我试图从当地政府那里获取数据。我想要的是来自儿童收养办公室的地址。在巴西,所有收养都通过政府。所以我有一个办公室的URL,还有2或3千多个。但如果我能设法得到一个,其他人将很容易。 我做了很多尝试,下面我展示了三个。
问题可能与刷新页面的Javascript(可能是Ajax)有关。
注意:我不是PHP开发人员。
首次尝试
echo '<html><head></head><body>';
echo '<h1>Scraper PHP GET 1</h1>';
echo ini_get("allow_url_fopen");
echo ini_get("allow_url_fopen");
// I used this url for test
//$url = 'http://www.portaldaadocao.com.br';
//This is the URL that I really want
$url = 'http://www.cnj.jus.br/cna/Controle/ConsultaPublicaBuscaControle.php?transacao=CONSULTA&vara=2673';
$html = file_get_contents($url);
var_dump($html);
echo '</body></html>';
// Output
// 11
// Warning:
file_get_contents(http://www.cnj.jus.br/cna/Controle/ConsultaPublicaBuscaControle.php?
transacao=CONSULTA&vara=2673) [function.file-get-contents]: failed to open stream: HTTP
request failed! HTTP/1.1 404 Not Found in /home/rsl/www/sc01_get.php on line 14
// bool(false)
第二次尝试
echo '<html><head></head><body>';
echo '<h1>Scraper PHP CURL 3</h1>';
// I used this url for test
//$url = 'http://www.portaldaadocao.com.br';
//This is the URL that I really want
$url = 'http://www.cnj.jus.br/cna/Controle/ConsultaPublicaBuscaControle.php?transacao=CONSULTA&vara=2673';
$curl = curl_init($url);
@curl_setopt($curl, CURLOPT_POSTFIELDS, "foo");
@curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
@curl_setopt($curl, CURLOPT_CUSTOMREQUEST, "POST");;
$html=@curl_exec($curl);
if (!$html) {
echo "<br />cURL error number:" .curl_errno($curl);
echo "<br />cURL error:" . curl_error($curl);
exit;
}
else{
echo '<br>begin HTML[';
echo $html;
echo '<br>]end html ';
}
echo '</body></html>';
// Output
// 1
第三次尝试
function curl($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.6 (KHTML, like Gecko) Chrome/16.0.897.0 Safari/535.6');
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookie.txt");
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookie.txt");
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_REFERER, "http://www.windowsphone.com");
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
echo '<html><head></head><body>';
echo '<h1>Scraper PHP CURL 5</h1>';
// I used this url for test
//$url = 'http://www.portaldaadocao.com.br';
//This is the URL that I really want
$url = 'http://www.cnj.jus.br/cna/Controle/ConsultaPublicaBuscaControle.php?transacao=CONSULTA&vara=2673';
$curl = curl_init($url);
@curl_setopt($curl, CURLOPT_POSTFIELDS, "foo");
@curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
@curl_setopt($curl, CURLOPT_CUSTOMREQUEST, "POST");;
$html=@curl($curl);
if (!$html) {
echo "<br />cURL error number:" .curl_errno($curl);
echo "<br />cURL error:" . curl_error($curl);
exit;
}
else{
echo '<br>begin HTML[';
echo $html;
echo '<br>]end html ';
}
echo '</body></html>';
// Output
// cURL error number:0
// cURL error: