我有以下脚本,它正在抓取页面以查找地址:
require VENDOR_DIR.'/autoload.php';
use GuzzleHttp\Psr7;
use GuzzleHttp\Exception\RequestException;
class AddrFind{
function __construct(){
$this->init();
}
function init(){
add_action('wp_footer', array($this, 'add_js'));
add_action('wp_ajax_search_address', array($this, 'search_address'));
add_action('wp_ajax_nopriv_search_address', array($this, 'search_address'));
}
function add_js(){
wp_enqueue_script('addr-script', LIB_URL.'/js/addrfind.js');
wp_localize_script('addr-script', 'AddrFind', array(
'ajaxurl'=>admin_url('admin-ajax.php')
));
}
function search_address(){
$q = $_POST['addr'];
$page = $_POST['page'];
$protocol = 'http';
if(isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] != 'off'){
$protocol = 'https';
}
$client = new \GuzzleHttp\Client();
try{
$response = $client->request('GET', 'http://postcode.map.daum.net/search', array(
'headers'=>array(
'User-Agent'=>'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'
),
'query' => array(
"region_name"=>$q,
"cq"=>$q,
"cpage"=>$page,
"origin"=>$protocol.'://'.$_SERVER['SERVER_NAME'],
"isp"=>"N",
"isgr"=>"N",
"isgj"=>"N",
"ongr"=>"N",
"ongj"=>"N",
"regionid"=>"",
"regionname"=>"",
"roadcode"=>"",
"roadname"=>"",
"banner"=>"on",
"indaum"=>"off",
"vt"=>"popup",
"am"=>"on",
"ani"=>"off",
"mode"=>"transmit",
"sd"=>"on",
"hmb"=>"off",
"heb"=>"off",
"smh"=>"off",
"theme"=>"",
"sit"=>"",
"sgit"=>"",
"sbit"=>"",
"pit"=>"",
"mit"=>"",
"lcit"=>"",
"plrg"=>"",
"plrgt"=>"1.5",
"zn"=>"Y",
"CWinWidth"=>"1280",
"sptype"=>"",
"sporgq"=>"",
"fullpath"=>""
)
));
$html = $response->getBody();
$doc = new DOMDocument();
$doc->loadHTML($html);
// echo $html;
// die();
$xpath = new DOMXPath($doc);
// $addrs = $xpath->query('//dd[contains(@class, "main_address")]//a[@class="link_post"]/span[@class="txt_addr"]');
$addrs = $xpath->query('//ul[contains(@class, "list_post")]/li');
$total_pages = $xpath->query('//div[@class="inner_paging"]/span[@class="num_page"]/following-sibling::text()[1]');
$output = new stdClass;
$output->total_pages = ($total_pages->length > 0) ? preg_replace("/[^\d]/", "", $total_pages->item(0)->nodeValue):0;
$output->result = array();
foreach($addrs as $addr){
$building = (strlen($addr->getAttribute('data-building_name')) > 0) ? '('.$addr->getAttribute('data-building_name').')':'';
$address = new stdClass;
$address->road = $addr->getAttribute('data-addr').' '.$building;
$address->zip = $addr->getAttribute('data-zonecode');
$output->result[] = $address;
// $result[] = $addr->nodeValue;
//
echo $addr->getAttribute('data-addr');
}
echo json_encode($output);
}catch(RequestException $e){
print_r($e);
}
wp_die();
}
}
我的localhost中的代码没有任何问题,行echo $addr->getAttribute('data-addr');
显示它应该是什么。但在实时服务器中,该行成为破碎的字符,unicode,这里是:ìì¸ ê°ë¨êµ¬ ê°ë¨ëë¡146길 6ìì¸