PHP按标签错误获取元素

时间:2015-10-11 10:46:53

标签: php html

在这里,我想简单地加载URL并使用PHP回显出所有图像。这是我使用的代码,但我得到错误:

错误:

Call to a member function getElementsByTagName() on string

如何让这些代码打印出图像和/或其他元素,如H1或P?这是我的代码:

代码:

<?php

$to_crawl = "http://thechive.com/";

function get_links($url){
    $input = file_get_contents($url);
    $images = $input->getElementsByTagName('img');
    echo $images;
}

get_links($to_crawl);

?>

4 个答案:

答案 0 :(得分:1)

试试这个

&#13;
&#13;
<?php

$to_crawl = "http://thechive.com/";


function get_links($to_crawl) { 

    // Create a new DOM Document to hold our webpage structure 
    $the_html = new DOMDocument(); 

    // Load the url's contents into the DOM 
    $the_html->loadHTMLFile($to_crawl); 

    // Empty array to hold all links to return 
    $links = array(); 

    //Loop through each <img> tag in the dom and add it to the link array 
    foreach($the_html->getElementsByTagName('img') as $link) { 
        $links[] = array('url' => $link->getAttribute('href'), 'text' => $link->nodeValue); 
    } 

    
    //  echo links
    foreach($links as $link){
       echo $link."<br>";

//Return the links 
   return $links; 

} 

get_links($to_crawl);

?>
&#13;
&#13;
&#13;

答案 1 :(得分:1)

在访问getElementsByTagName方法之前,您需要创建一个新的DomDocument()对象。 这是一个简单的例子:

<?php
libxml_use_internal_errors(true);

$html = file_get_contents("http://thechive.com/");
$dom = new DomDocument();
$dom->loadHtml($html);
$images = $dom->getElementsByTagName('img');
foreach ($images as $image) {
  echo $image->getAttribute('src');
}

答案 2 :(得分:1)

<?php
/* Utility class to simplify getting dom object from html */
class htmldom{
    private $html;
    public function __construct( $data=false, $convert=true ){
        try{
            if( !$data ) return false;
            libxml_use_internal_errors( true );
            $this->html = new DOMDocument('1.0','utf-8');
            $this->html->validateOnParse=false;
            $this->html->standalone=true;
            $this->html->preserveWhiteSpace=true;
            $this->html->strictErrorChecking=false;
            $this->html->substituteEntities=false;
            $this->html->recover=true;
            $this->html->formatOutput=true;

            $this->html->loadHTML( $convert ? mb_convert_encoding( $data, 'utf-8' ) : $data );

            $parse_errs=serialize( libxml_get_last_error() );
            libxml_clear_errors();

        }catch( Exception $e ){
            die( $e->getMessage() );    
        }
    }
    public function gethtml(){
        return $this->html;
    }
}



$url = "http://thechive.com/";
/* store results here */
$data=array();

/* The tags you are interested in finding within the html src */
$tags=array('img','p','h1');


/* Create the dom object with html from url */
$dom=new htmldom( file_get_contents( $url ), true );
$html=$dom->gethtml();

/* Get all tags */
$col=$html->getElementsByTagName('*');

if( $col->length > 0 ){
    foreach( $col as $tag ) {
        /* Is this a tag we are interested in? */
        if( in_array( $tag->tagName, $tags ) ){
            if( $tag->tagName=='img' ) $data[]=$tag->getAttribute('src');
            else $data[]=array( 'tag' => $tag->tagName, 'value' => $tag->nodeValue );
        }
    }
}
$dom=$html=null;
/* Do stuff with the results */
echo '<pre>',print_r($data,true),'</pre>';
?>

答案 3 :(得分:0)

<?php

# Use the Curl extension to query Google and get back a page of results
$html = file_get_contents("http://thechive.com/");


# Create a DOM parser object
$dom = new DOMDocument();

# Parse the HTML from Google.
# The @ before the method call suppresses any warnings that
# loadHTML might throw because of invalid HTML in the page.
@$dom->loadHTML($html);

# Iterate over all the <img> tags
foreach($dom->getElementsByTagName('img') as $link) {
        # Show the <a href>
        echo $link->getAttribute('src');
        echo "<br />";
}
?>