嘿伙计们我使用curl来传达web外部服务器,但是响应的类型是html,我能够将它转换为json代码(超过4000行)但我不知道如何获取包含我的指定行结果。有什么想法吗?
这是我的cUrl代码:
require_once('getJson.php');
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'http://www.reputationauthority.org/domain_lookup.php?ip=website.com&Submit.x=9&Submit.y=5&Submit=Search');
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
$data = curl_exec($ch);
$httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
$data = '<<<EOF'.$data.'EOF';
$json = new GetJson();
header("Content-Type: text/plain");
$res = json_encode($json->html_to_obj($data), JSON_PRETTY_PRINT);
$myArray = json_decode($res,true);
对于getJson.php
class GetJson{
function html_to_obj($html) {
libxml_use_internal_errors(true);
$dom = new DOMDocument();
$dom->loadHTML($html);
return $this->element_to_obj($dom->documentElement);
}
function element_to_obj($element) {
if ($element->nodeType == XML_ELEMENT_NODE){
$obj = array( "tag" => $element->tagName );
foreach ($element->attributes as $attribute) {
$obj[$attribute->name] = $attribute->value;
}
foreach ($element->childNodes as $subElement) {
if ($subElement->nodeType == XML_TEXT_NODE) {
$obj["html"] = $subElement->wholeText;
}
else {
$obj["children"][] = $this->element_to_obj($subElement);
}
}
return $obj;
}
}
}
我的想法不是浏览行来实现lign 2175(做类似的事情:$ data [&#39; children&#39;] [2] [&#39; children&#39;] [7] [&# 39,儿童&#39;] [3] [&#39;儿童&#39;] [1] [&#39;儿童&#39;] [1] [&#39;儿童&#39;] [0] [ &#39;儿童&#39;] [1] [&#39;儿童&#39;] [0] [&#39;儿童&#39;] [1] [&#39;儿童&#39;] [2 ] [&#39;孩子&#39;] [0] [&#39;孩子&#39;] [0] [&#39; html&#39;]对我来说不是个好主意),我想去直接到它。
答案 0 :(得分:0)
如果返回的HTML每次都具有一致的结构,并且您只想从其中的一部分获得一个特定值,则可以使用正则表达式来解析HTML并找到所需的部分。这是您尝试将整个事物放入数组的替代方法。我之前使用过这种技术来解析HTML文档并找到一个特定的项目。这是一个简单的例子。您需要根据自己的需要进行调整,因为您尚未指定所需数据的确切性质。您可能需要进行几个级别的解析才能找到正确的位:
$data = curl_exec($ch);
//Split the output into an array that we can loop through line by line
$array = preg_split('/\n/',$data);
//For each line in the output
foreach ($array as $element)
{
//See if the line contains a hyperlink
if (preg_match("/<a href/", "$element"))
{
...[do something here, e.g. store the data retrieved, or do more matching to find something within it]...
}
}