Question

我有一个函数，我使用array_walk函数使用Title函数扫描Array [link]中的值并将其放在相应数组的[title]中

例如The Qlick =＆gt; [link] =＆gt; “http://www.theqlick.com” [title] =＆gt; Qlick

    $links = Array();

$URL = 'http://www.theqlick.com'; // change it for urls to grab  

// grabs the urls from URL 
$file  = file_get_html($URL);
foreach ($file->find('a') as $theelement) 
{
    $abs_url = url_to_absolute($URL, $theelement->href);
    if (!empty($abs_url))
        $links[] = $abs_url;
}

  function Titles() {
  global $links;
  $str = implode('',array_map('file_get_contents',$links));
  error_reporting(E_ERROR | E_PARSE);

  $titles = Array();
    if( strlen( $str )>0 ) {
  $titles[] = preg_match_all( "/\<title\>(.*)\<\/title\>/", $str, $titles );
  return $title;   
  return $links;
  } }


  $newArray = array();


  $title = array_walk($links, 'Titles');
  foreach($links as $key => $val ){
$newArray[$key] = array( 'link' => $val, 'title' => $title);
 }
 print_r($newArray);

My result when var_dump is used:
  array(2) {



 [0]=>
  array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
      }



  [1]=>
  array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
    }

    }
   array(2) {
   [0]=>
     array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
       }


  [1]=>
 array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
    }
    }


  array(2) {
 [0]=>
 array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
     }
  [1]=>
  array(6) {
   [0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
     }
     }

    array(2) {
   [0]=>
   array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
      }
  [1]=>
   array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
     }
     }
    array(2) {
   [0]=>
   array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
     }
   [1]=>
   array(6) {
  [0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
    }
    }
  array(2) {
 [0]=>
 array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
    }

  [1]=>
  array(6) {
   [0]=>
   string(11) " The Qlick "
   [1]=>

 string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
    }
    }


   Array
   ( 
[0] => Array
    (
        [link] => http://www.theqlick.com/index.php
        [title] => 1
    )

[1] => Array
    (
        [link] => http://www.theqlick.com/qlickdates.php
        [title] => 1
    )

[2] => Array
    (
        [link] => http://www.theqlick.com/festivalfreaks.html
        [title] => 1
    )

[3] => Array
    (
        [link] => http://www.theqlick.com/2kcm.php
        [title] => 1
    )

[4] => Array
    (
        [link] => http://www.theqlick.com/index3.php
        [title] => 1
    )

[5] => Array
    (
        [link] => http://www.theqlick.com/index2.php
        [title] => 1
    )

       )

Answer 1

将功能结束更改为：

if( strlen( $str )>0 ) {
    $titles = Array();
    preg_match_all( "/\<title\>(.*)\<\/title\>/", $str, $titles );
    if (count($titles) > 1) {
        return $titles[1];   
    }
}

return '';

preg_match_all返回匹配数。这将返回您找到的第一个匹配项，如果没有标题或没有要搜索的文本，则返回空字符串。

我没有测试过，所以可能需要调试。

编辑添加：

$links = Array();  
$URL = 'http://www.theqlick.com'; // change it for urls to grab    
// grabs the urls from URL  

function Titles($link) {
    $str = file_get_contents($link);    
    if( strlen( $str )>0 ) {    
        preg_match_all( "/\<title\>(.*)\<\/title\>/", $str, $titles );
        if (count($titles) > 1) {
            return $titles[1];   
        }
    }

    return '';
}

$file  = file_get_html($URL); 
foreach ($file->find('a') as $theelement)  {     
    $abs_url = url_to_absolute($URL, $theelement->href);     
    if (!empty($abs_url)) {
         $links[] = $abs_url; 
    } 
}

$output = Array();

foreach ($links as $thisLink) {
    $output[] = array("link" => $thisLink, "title" => Titles($thisLink));
}

（同样，这是非常未经测试的）

您生成链接列表;然后，逐步浏览该列表，并为每个列表获取页面标题。你一次只做一个，所以跟踪它的位置要容易得多。

我的数组返回1

1 个答案: