Am trying to scrape info from a site.
The site have like this
127 East Zhongshan No 2 Rd; 中山东二路127号
But when i try to scrap it & echo it then it will show
127 East Zhongshan No 2 Rd; ä¸å±±ä¸äºè·¯127å·
I also try UTF-8
There is my php code
now please help me for solve this problem.
function GrabPage($site){
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
curl_setopt($ch, CURLOPT_TIMEOUT, 40);
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookie.txt");
curl_setopt($ch, CURLOPT_URL, $site);
ob_start();
return curl_exec ($ch);
ob_end_clean();
curl_close ($ch);
}
$GrabData = GrabPage($site);
$dom = new DOMDocument();
@$dom->loadHTML($GrabData);
$xpath = new DOMXpath($dom);
$mainElements = array();
$mainElements = $xpath->query("//div[@class='col--one-whole mv--col--one-half wv--col--one-whole'][1]/dl/dt");
foreach ($mainElements as $Names2) {
$Name2 = $Names2->nodeValue;
echo "$Name2";
}
答案 0 :(得分:1)
First off, you need to set the charset before anything else on top of PHP file:
header('Content-Type: text/html; charset=utf-8');
You need to convert the html markup you got with mb_convert_encoding
:
@$dom->loadHTML(mb_convert_encoding($GrabData, 'HTML-ENTITIES', 'UTF-8'));
答案 1 :(得分:0)
First thing is to see if the captured HTML source is properly encoded. If yes try
utf8_decode($Name2)
This should get your string ready for saving as well as printing