使用java.net.URLConnection从html获取数据不是获取实际数据而是仅获取html代码。 由于html动态生成数据,因此核心html代码没有显示在html上的数据。
是否还有其他方法可以获取html页面上显示的数据?
我的代码是这样的:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
public class Example {
public static void main(String[] args) throws IOException {
System.setProperty("java.net.useSystemProxies", "true");
// Make a URL to the web page
URL url = new URL("https://rci-dtengine.rnd.ki.sw.ericsson.se/root//proj/cpptemp/jcat-cpp/DI/megatron/DT/INT_PA284/20170331_152720/20170331_152723/index.html");
// Get the input stream through URL Connection
URLConnection con = url.openConnection();
InputStream is =con.getInputStream();
// Once you have the Input Stream, it's just plain old Java IO stuff.
// For this case, since you are interested in getting plain-text web page
// I'll use a reader and output the text content to System.out.
// For binary content, it's better to directly read the bytes from stream and write
// to the target file.
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String line = null;
// read each line and write to System.out
while ((line = br.readLine()) != null) {
System.out.println(line);
}
}
}
输出:
<!DOCTYPE html>
<html>
<head>
<link rel="shortcut icon" href="img/jcat_m3.png"/>
<title>JCAT Test Report</title>
<link rel="stylesheet" type="text/css" href="css/jquery.fancybox.css" />
<link rel="stylesheet" type="text/css" href="css/top.css" media="screen" />
<link rel="stylesheet" type="text/css" href="css/jquery-ui.min.css" />
<link rel="stylesheet" type="text/css" href="css/testsuite.css" />
<script src="js/jquery-1.11.1.min.js"></script>
<script src="js/jquery-ui.min.js"></script>
<script src="js/jquery.fancybox.pack.js"></script>
<script src="js/jquery.storage.js"></script>
<script src="js/stupidtable.min.js"></script>
<script src="js/common.js"></script>
<script src="js/top.js"></script>
<script src="js/testsuite.js"></script>
<script>
window.onerror = function () {
alert("Failed to load data files as the format is corrupt!");
};
function receiveMessage(event) {
if (event.data !== "reload from settings") {
return;
}
window.location.reload();
}
window.addEventListener("message", receiveMessage, false);
</script>
<script src="json/suite.data.js"></script>
<script src="json/top.data.js"></script>
<script src="json/bulletin.message.data.js"></script>
</head>
<body>
<div class='top'></div>
<div id="bulletin">
<div id="bulletinBar">
<img id="bulletinSwitch"></img>
<span id="bulletinHint" style="display:none">hide bulletin</span>
</div>
<div id="bulletinFrame" class="ui-widget-content">
<div id="bulletinContent"></div>
</div>
</div>
<div id="autoRefresh">
<span id="refreshCountDown" style="display:none"></span>
<input id='autoRefreshBtn' type='button' value='enable auto refresh'></input>
</div>
<div id="content">
<div id="sumBlock">
<h3>Report Summary</h3>
<table id="suiteInfo" summary="Suite summary information">
<tbody>
<tr>
<td width="130px">Suite name:</td>
<td id="suiteName"></td>
</tr>
<tr>
<td>Run by:</td>
<td id="runBy"></td>
</tr>
<tr>
<td>Time started:</td>
<td id="startTime" class="timestamp"></td>
</tr>
<tr>
<td>Time finished:</td>
<td id="endTime" class="timestamp"></td>
</tr>
<tr>
<td>Duration:</td>
<td id="duration"></td>
</tr>
</tbody>
</table>
</div>
<div id="staBlock">
<h3>Statistics</h3>
<table id="statistics" summary="Suite statistic">
<tbody>
<tr>
<td width="100px">Passed TCs:</td>
<td id="tcPassed" width="50px"></td>
<td width="120px">Passed configs:</td>
<td id="configTcPassed" width="50px"></td>
</tr>
<tr>
<td>Failed TCs:</td>
<td id="tcFailed"></td>
<td >Failed configs:</td>
<td id="configTcFailed"></td>
</tr>
<tr>
<td>Error TCs:</td>
<td id="tcError"></td>
<td>Error configs:</td>
<td id="configTcError"></td>
</tr>
<tr>
<td>Skipped TCs:</td>
<td id="tcSkipped"></td>
<td>Skipped configs:</td>
<td id="configTcSkipped"></td>
</tr>
<tr>
<td>Excluded TCs:</td>
<td id="tcExcluded"></td>
<td>Executed configs:</td>
<td id="configTcRun"></td>
</tr>
<tr>
<td>Inconclusive TCs:</td>
<td id="tcInconclusive"></td>
<td></td>
<td></td>
</tr>
<tr>
<td>Executed TCs:</td>
<td id="tcRun"></td>
<td></td>
<td></td>
</tr>
<tr>
<td>UnExecuted TCs:</td>
<td id="tcNotRun"></td>
<td>TCs & configs:</td>
<td id="tcTotalRun"></td>
</tr>
</tbody>
</table>
</div>
<div id="testCases">
<input placeholder="Search..." type="text" id="filterText" maxlength="200" style="margin-left:5px;"/>
<div id='viewSwitch' style='display:none'>
| <span id="nestedFunc" style='display:none'> Show Nested Suites<input id='nestedSwitch' onchange="switchSuiteTable(this)" type="checkbox" value="Nested"></span>
<span id="groupedFunc" style='display:none'> Show Grouped Suite<input id='groupedSwitch' onchange="switchSuiteTable(this)" type="checkbox" value="Grouped"></span>
Auto-unfold Errors<input id='autoUnfold' onchange="setAutoUnfold()" type="checkbox" value="autoUnfold" checked="checked">
<input id="unfoldFunc" type="button" value="Unfold" onclick="$('tr.folded').click()">
<input id="foldFunc" type="button" value="Fold" onclick="$('tr.unfolded').click()">
</div>
| <span id="filter_icondown" style="color: #99cc00">►</span> <span id="filter_iconright" style="color: #99cc00; display: none">▼</span>
<a id="filterHide" href="javascript:hideUnhideTable('filter')">Filters</a>
<div id="jcat_label">
| <span id="label_text">Labels:</span>
<select id="label_select">
</select>
</div>
<table id="filter" style="display: none;">
<tr>
<td width="120px">Passed TCs/configs:</td>
<td width="50px"><a id="label_passed" href="javascript:hideUnhideRows('passed')">Hide</a></td>
<td width="160px">Skipped TCs/configs:</td>
<td width="50px"><a id="label_skipped" href="javascript:hideUnhideRows('skipped')">Hide</a></td>
<td width="160px">All testcases:</td>
<td width="50px"><a id="label_testcases" href="javascript:hideUnhideRows('testcases')">Hide</a></td>
</tr>
<tr>
<td>Failed TCs/configs:</td>
<td><a id="label_failed" href="javascript:hideUnhideRows('failed')">Hide</a></td>
<td>Excluded TCs/configs:</td>
<td><a id="label_excluded" href="javascript:hideUnhideRows('excluded')">Hide</a></td>
<td>All configuration methods:</td>
<td><a id="label_cfgs" href="javascript:hideUnhideRows('cfgs')">Hide</a></td>
</tr>
<tr>
<td>Error TCs/configs:</td>
<td><a id="label_error" href="javascript:hideUnhideRows('error')">Hide</a></td>
<td>Inconclusive TCs/configs:</td>
<td><a id="label_inconc" href="javascript:hideUnhideRows('inconc')">Hide</a></td>
<td>All reruned testcases:</td>
<td><a id="label_rerun" href="javascript:hideUnhideRows('rerun')">Show</a></td>
</tr>
</table>
<table id="suiteTable" class='suite'>
<thead>
<tr>
<th data-sort="int" id="tcIndex" style="display:none">Index</th>
<th data-sort="string">ID</th>
<th data-sort="string">Name</th>
<th data-sort="string">Type</th>
<th data-sort="string">Heading</th>
<th data-sort="int">State</th>
<th data-sort="string">Additional Result Info</th>
<th data-sort="string" style="display:none;">Start Time</th>
<th data-sort="string" style="display:none;">End Time</th>
<th data-sort="string">Duration</th>
<th data-sort="string">Fetched Logs</th>
<th data-sort="string">Labels</th>
</tr>
</thead>
<tbody></tbody>
</table>
<table id="nestedTable" class='suite Nested' style="display:none">
<thead>
<tr>
<th id="tcIndex" style="display:none">Index</th>
<th>ID</th>
<th>Name</th>
<th>Type</th>
<th>Heading</th>
<th>State</th>
<th>Additional Result Info</th>
<th style="display:none;">Start Time</th>
<th style="display:none;">End Time</th>
<th>Duration</th>
<th>Fetched Logs</th>
<th>Labels</th>
</tr>
</thead>
<tbody></tbody>
</table>
<table id="groupedTable" class='suite Grouped' style="display:none">
<thead>
<tr>
<th id="tcIndex" style="display:none">Index</th>
<th>ID</th>
<th>Name</th>
<th>Type</th>
<th>Heading</th>
<th>State</th>
<th>Additional Result Info</th>
<th style="display:none;">Start Time</th>
<th style="display:none;">End Time</th>
<th>Duration</th>
<th>Fetched Logs</th>
<th>Labels</th>
</tr>
</thead>
<tbody></tbody>
</table>
</div>
</div>
</body>
</html>