我有一个函数,我使用array_walk函数使用Title函数扫描Array [link]中的值并将其放在相应数组的[title]中
例如The Qlick => [link] => “http://www.theqlick.com” [title] => Qlick
$links = Array();
$URL = 'http://www.theqlick.com'; // change it for urls to grab
// grabs the urls from URL
$file = file_get_html($URL);
foreach ($file->find('a') as $theelement)
{
$abs_url = url_to_absolute($URL, $theelement->href);
if (!empty($abs_url))
$links[] = $abs_url;
}
function Titles() {
global $links;
$str = implode('',array_map('file_get_contents',$links));
error_reporting(E_ERROR | E_PARSE);
$titles = Array();
if( strlen( $str )>0 ) {
$titles[] = preg_match_all( "/\<title\>(.*)\<\/title\>/", $str, $titles );
return $title;
return $links;
} }
$newArray = array();
$title = array_walk($links, 'Titles');
foreach($links as $key => $val ){
$newArray[$key] = array( 'link' => $val, 'title' => $title);
}
print_r($newArray);
My result when var_dump is used:
array(2) {
[0]=>
array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
}
[1]=>
array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
}
}
array(2) {
[0]=>
array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
}
[1]=>
array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
}
}
array(2) {
[0]=>
array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
}
[1]=>
array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
}
}
array(2) {
[0]=>
array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
}
[1]=>
array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
}
}
array(2) {
[0]=>
array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
}
[1]=>
array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
}
}
array(2) {
[0]=>
array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
}
[1]=>
array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
}
}
Array
(
[0] => Array
(
[link] => http://www.theqlick.com/index.php
[title] => 1
)
[1] => Array
(
[link] => http://www.theqlick.com/qlickdates.php
[title] => 1
)
[2] => Array
(
[link] => http://www.theqlick.com/festivalfreaks.html
[title] => 1
)
[3] => Array
(
[link] => http://www.theqlick.com/2kcm.php
[title] => 1
)
[4] => Array
(
[link] => http://www.theqlick.com/index3.php
[title] => 1
)
[5] => Array
(
[link] => http://www.theqlick.com/index2.php
[title] => 1
)
)
答案 0 :(得分:0)
将功能结束更改为:
if( strlen( $str )>0 ) {
$titles = Array();
preg_match_all( "/\<title\>(.*)\<\/title\>/", $str, $titles );
if (count($titles) > 1) {
return $titles[1];
}
}
return '';
preg_match_all
返回匹配数。这将返回您找到的第一个匹配项,如果没有标题或没有要搜索的文本,则返回空字符串。
我没有测试过,所以可能需要调试。
编辑添加:
$links = Array();
$URL = 'http://www.theqlick.com'; // change it for urls to grab
// grabs the urls from URL
function Titles($link) {
$str = file_get_contents($link);
if( strlen( $str )>0 ) {
preg_match_all( "/\<title\>(.*)\<\/title\>/", $str, $titles );
if (count($titles) > 1) {
return $titles[1];
}
}
return '';
}
$file = file_get_html($URL);
foreach ($file->find('a') as $theelement) {
$abs_url = url_to_absolute($URL, $theelement->href);
if (!empty($abs_url)) {
$links[] = $abs_url;
}
}
$output = Array();
foreach ($links as $thisLink) {
$output[] = array("link" => $thisLink, "title" => Titles($thisLink));
}
(同样,这是非常未经测试的)
您生成链接列表;然后,逐步浏览该列表,并为每个列表获取页面标题。你一次只做一个,所以跟踪它的位置要容易得多。