已关联
No link...Use default location.
http://www.linkedin.com/favicon.ico
微博
<link href="/phoenix/favicon.ico" rel="shortcut icon" type="image/x-icon" />
Pinterest的
<link rel="icon" href="http://passets-cdn.pinterest.com/images/favicon.png" type="image/x-icon" />
实
<link rel="shortcut icon" href="https://s-static.ak.facebook.com/rsrc.php/yi/r/q9U99v3_saj.ico" />
我已经确定找到图标的唯一方法是检查来源并查看链接的位置。
是否有可以解析此信息的功能?或者是否有一个很好的策略来使用正则表达式手动将其拉出来。
我将解析html服务器端以获取此信息。
思路:
正则表达式示例:Try Here。似乎很容易......但这是一个起点。
<link\srel="[Ss]hortcut [Ii]con"\shref="(.+)"(.+)>
答案 0 :(得分:3)
使用解析器:
$dom = new DOMDocument();
@$dom->loadHTML($input);
$links = $dom->getElementsByTagName('link');
$l = $links->length;
$favicon = "/favicon.ico";
for( $i=0; $i<$l; $i++) {
$item = $links->item($i);
if( strcasecmp($item->getAttribute("rel"),"shortcut icon") === 0) {
$favicon = $item->getAttribute("href");
break;
}
}
// You now have your $favicon
答案 1 :(得分:0)
替代PHP 5 DOMDocument:raw regex
到目前为止,这适用于所有情况。
$pattern = '#<link\s+(?=[^>]*rel="(?:shortcut\s)?icon"\s+)(?:[^>]*href="(.+?)").*>#i';
答案 2 :(得分:0)
您必须解决几个问题,例如网站重定向和各种警告。以下是我收集的东西,比如90%的网站提供的东西:
<?
/*
nws-favicon : Get site's favicon using various strategies
This script is part of NWS
https://github.com/xaccrocheur/nws/
*/
function CheckImageExists($imgUrl) {
if (@GetImageSize($imgUrl)) {
return true;
} else {
return false;
};
};
function getFavicon ($url) {
$fallback_favicon = "/var/www/favicon.ico";
$dom = new DOMDocument();
@$dom->loadHTML($url);
$links = $dom->getElementsByTagName('link');
$l = $links->length;
$favicon = "/favicon.ico";
for( $i=0; $i<$l; $i++) {
$item = $links->item($i);
if( strcasecmp($item->getAttribute("rel"),"shortcut icon") === 0) {
$favicon = $item->getAttribute("href");
break;
}
}
$u = parse_url($url);
$subs = explode( '.', $u['host']);
$domain = $subs[count($subs) -2].'.'.$subs[count($subs) -1];
$file = "http://".$domain."/favicon.ico";
$file_headers = @get_headers($file);
if($file_headers[0] == 'HTTP/1.1 404 Not Found' || $file_headers[0] == 'HTTP/1.1 404 NOT FOUND' || $file_headers[0] == 'HTTP/1.1 301 Moved Permanently') {
$fileContent = @file_get_contents("http://".$domain);
$dom = @DOMDocument::loadHTML($fileContent);
$xpath = new DOMXpath($dom);
$elements = $xpath->query("head/link//@href");
$hrefs = array();
foreach ($elements as $link) {
$hrefs[] = $link->value;
}
$found_favicon = array();
foreach ( $hrefs as $key => $value ) {
if( substr_count($value, 'favicon.ico') > 0 ) {
$found_favicon[] = $value;
$icon_key = $key;
}
}
$found_http = array();
foreach ( $found_favicon as $key => $value ) {
if( substr_count($value, 'http') > 0 ) {
$found_http[] = $value;
$favicon = $hrefs[$icon_key];
$method = "xpath";
} else {
$favicon = $domain.$hrefs[$icon_key];
if (substr($favicon, 0, 4) != 'http') {
$favicon = 'http://' . $favicon;
$method = "xpath+http";
}
}
}
if (isset($favicon)) {
if (!CheckImageExists($favicon)) {
$favicon = $fallback_favicon;
$method = "fallback";
}
} else {
$favicon = $fallback_favicon;
$method = "fallback";
}
} else {
$favicon = $file;
$method = "classic";
if (!CheckImageExists($file)) {
$favicon = $fallback_favicon;
$method = "fallback";
}
}
return $favicon;
}
?>