尝试使用file_get_contents
页面:http://www.sapporo-keihan.jp/maruyama/outline
function pageContent(String $url): \DOMDocument
{
$html = cache()->rememberForever($url, function () use ($url) {
$opts = [
"http" => [
"method" => "GET",
"header" => "Accept: text/html\r\n"
]
];
$context = stream_context_create($opts);
$file = file_get_contents($url, false, $context);
return $file;
});
$parser = new \DOMDocument();
libxml_use_internal_errors(true);
$parser->loadHTML($html = mb_convert_encoding($html,'HTML-ENTITIES', 'ASCII, JIS, UTF-8, EUC-JP, SJIS'));
return $parser;
}
$html = pageContent("http://www.sapporo-keihan.jp/maruyama/outline");
$path = new \DOMXPath($html);
$catch = $path->query("//body");
foreach ($catch as $found){
$site = trim($found->nodeValue);
}
但是我不能正确返回内容。除了内容之外,它还给了我页面的CSS。该功能有什么问题。是什么导致此问题。谢谢!
答案 0 :(得分:3)
似乎对于此url,服务器默认情况下会返回css。指定http Accept: text/html
标头即可解决此问题。
您可以通过以下方式进行操作:
$opts = [
"http" => [
"method" => "GET",
"header" => "Accept: text/html\r\n"
]
];
$context = stream_context_create($opts);
$file = file_get_contents($url, false, $context);