在这里,我想简单地加载URL并使用PHP回显出所有图像。这是我使用的代码,但我得到错误:
错误:
Call to a member function getElementsByTagName() on string
如何让这些代码打印出图像和/或其他元素,如H1或P?这是我的代码:
代码:
<?php
$to_crawl = "http://thechive.com/";
function get_links($url){
$input = file_get_contents($url);
$images = $input->getElementsByTagName('img');
echo $images;
}
get_links($to_crawl);
?>
答案 0 :(得分:1)
试试这个
<?php
$to_crawl = "http://thechive.com/";
function get_links($to_crawl) {
// Create a new DOM Document to hold our webpage structure
$the_html = new DOMDocument();
// Load the url's contents into the DOM
$the_html->loadHTMLFile($to_crawl);
// Empty array to hold all links to return
$links = array();
//Loop through each <img> tag in the dom and add it to the link array
foreach($the_html->getElementsByTagName('img') as $link) {
$links[] = array('url' => $link->getAttribute('href'), 'text' => $link->nodeValue);
}
// echo links
foreach($links as $link){
echo $link."<br>";
//Return the links
return $links;
}
get_links($to_crawl);
?>
&#13;
答案 1 :(得分:1)
在访问getElementsByTagName
方法之前,您需要创建一个新的DomDocument()对象。
这是一个简单的例子:
<?php
libxml_use_internal_errors(true);
$html = file_get_contents("http://thechive.com/");
$dom = new DomDocument();
$dom->loadHtml($html);
$images = $dom->getElementsByTagName('img');
foreach ($images as $image) {
echo $image->getAttribute('src');
}
答案 2 :(得分:1)
<?php
/* Utility class to simplify getting dom object from html */
class htmldom{
private $html;
public function __construct( $data=false, $convert=true ){
try{
if( !$data ) return false;
libxml_use_internal_errors( true );
$this->html = new DOMDocument('1.0','utf-8');
$this->html->validateOnParse=false;
$this->html->standalone=true;
$this->html->preserveWhiteSpace=true;
$this->html->strictErrorChecking=false;
$this->html->substituteEntities=false;
$this->html->recover=true;
$this->html->formatOutput=true;
$this->html->loadHTML( $convert ? mb_convert_encoding( $data, 'utf-8' ) : $data );
$parse_errs=serialize( libxml_get_last_error() );
libxml_clear_errors();
}catch( Exception $e ){
die( $e->getMessage() );
}
}
public function gethtml(){
return $this->html;
}
}
$url = "http://thechive.com/";
/* store results here */
$data=array();
/* The tags you are interested in finding within the html src */
$tags=array('img','p','h1');
/* Create the dom object with html from url */
$dom=new htmldom( file_get_contents( $url ), true );
$html=$dom->gethtml();
/* Get all tags */
$col=$html->getElementsByTagName('*');
if( $col->length > 0 ){
foreach( $col as $tag ) {
/* Is this a tag we are interested in? */
if( in_array( $tag->tagName, $tags ) ){
if( $tag->tagName=='img' ) $data[]=$tag->getAttribute('src');
else $data[]=array( 'tag' => $tag->tagName, 'value' => $tag->nodeValue );
}
}
}
$dom=$html=null;
/* Do stuff with the results */
echo '<pre>',print_r($data,true),'</pre>';
?>
答案 3 :(得分:0)
<?php
# Use the Curl extension to query Google and get back a page of results
$html = file_get_contents("http://thechive.com/");
# Create a DOM parser object
$dom = new DOMDocument();
# Parse the HTML from Google.
# The @ before the method call suppresses any warnings that
# loadHTML might throw because of invalid HTML in the page.
@$dom->loadHTML($html);
# Iterate over all the <img> tags
foreach($dom->getElementsByTagName('img') as $link) {
# Show the <a href>
echo $link->getAttribute('src');
echo "<br />";
}
?>