我正在使用CURL
从某些网页获取内容。我需要从内容中提取媒体标签。
有没有可用的图书馆?或者任何想法都会很棒。
答案 0 :(得分:1)
这会有帮助吗?
function file_get_contents_curl($url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$html = file_get_contents_curl("http://example.com/");
//parsing begins here:
$doc = new DOMDocument();
@$doc->loadHTML($html);
$nodes = $doc->getElementsByTagName('title');
//get and display what you need:
$title = $nodes->item(0)->nodeValue;
$metas = $doc->getElementsByTagName('meta');
for ($i = 0; $i < $metas->length; $i++)
{
$meta = $metas->item($i);
if($meta->getAttribute('name') == 'description')
$description = $meta->getAttribute('content');
if($meta->getAttribute('name') == 'keywords')
$keywords = $meta->getAttribute('content');
}
echo "Title: $title". '<br/><br/>';
echo "Description: $description". '<br/><br/>';
echo "Keywords: $keywords";
或者如果您需要保存图片..
$remote_img = 'http://www.example.com/images/image.jpg ';
$img = imagecreatefromjpeg($remote_img);
$path = 'images/';
imagejpeg($img, $path);
function save_image($img,$fullpath){
$ch = curl_init ($img);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_BINARYTRANSFER,1);
$rawdata=curl_exec($ch);
curl_close ($ch);
if(file_exists($fullpath)){
unlink($fullpath);
}
$fp = fopen($fullpath,'x');
fwrite($fp, $rawdata);
fclose($fp);
}