我需要一个能够搜索$ get_webpage变量的函数,看它是否包含我的网站链接代码($ linktext)。该函数应该能够在整个网页上搜索$ linktext,该文本只应放在<body>
之后和</body>
标记之前。
谢谢你的帮助。
[[UPDATE]]大家好,快速更新,让我澄清一下example.com网页上的链接代码,其中包含rel =“nofollow”不应该有效,例如:
<a href="mysite.com/"; rel="nofollow"><strong>My Site</strong></a>
$cc = new cURL();
$get_webpage=$cc->get('http://www.example.com');
$linktext='<a href="http://www.mysite.com/"><strong>My Site</strong></a>';
//####################################################################
//GET URL FUNCTION
//####################################################################
class cURL {
var $headers;
var $user_agent;
var $compression;
var $cookie_file;
var $proxy;
function cURL($cookies=TRUE,$cookie='cookie.txt',$compression='gzip',$proxy='') {
$this->headers[] = 'Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg';
$this->headers[] = 'Connection: Keep-Alive';
$this->headers[] = 'Content-type: application/x-www-form-urlencoded;charset=UTF-8';
$this->user_agent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.0.3705; .NET CLR 1.1.4322; Media Center PC 4.0)';
$this->compression=$compression;
$this->proxy=$proxy;
$this->cookies=$cookies;
if ($this->cookies == TRUE) $this->cookie($cookie);
}
function cookie($cookie_file) {
if (file_exists($cookie_file)) {
$this->cookie_file=$cookie_file;
} else {
fopen($cookie_file,'w') or $this->error('The cookie file could not be opened. Make sure this directory has the correct permissions');
$this->cookie_file=$cookie_file;
fclose($this->cookie_file);
}
}
function get($url) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 0);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process,CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($process, CURLOPT_MAXREDIRS, 2);
$return = curl_exec($process);
curl_close($process);
return $return;
}
function post($url,$data) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process, CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_POSTFIELDS, $data);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($process, CURLOPT_MAXREDIRS, 2);
curl_setopt($process, CURLOPT_POST, 1);
$return = curl_exec($process);
curl_close($process);
return $return;
}
function error($error) {
$fp = fopen("error.txt","w") or die ();
$error_text="cURL Error:$error\n";
fputs($fp,$error_text);
fclose($fp) or die ();
die;
}
}
//######################################################################
//END URL FUNCTION
//#######################################################################
答案 0 :(得分:1)
您可以使用dom处理功能
$dom = new DOMDocument();
@$dom->loadHTML($html);
$x = new DOMXPath($dom);
foreach($x->query("//a") as $node)
{
if ($node->getAttribute("href") == "http://mysite.com")
{
// we got the link via href
}
if ($node->textContent == "http://mysite.com")
{
// we got the link via text
}
}
答案 1 :(得分:0)
有4种方法可以做到这一点(我知道)
我建议前两个,也许是DOM而不是XML。看看拜伦的例子,它应该做的伎俩。
答案 2 :(得分:0)
以下内容将使用xpath执行所有操作,但假设您希望My Site
必须在strong
标记内的限定条件:
function findLinks($html, $href, $text)
{
$dom = new SimpleXmlDocument($html);
$links = $dom->xpath("//a[@href='$url']/strong[contains(., '$text')]");
if(count($links) > 0)
{
return true;
}
return false;
}
如果您不关心强标记,可以使用xpath,如:
//a[@href='$url'][contains(., '$text')]
对XPath做一些研究,看看有什么可能。你可以使用一个简单的XPath来获取所有a
标签,然后在它们上面循环寻找你的限定符作为另一张海报建议。
答案 3 :(得分:0)
我不知道锚点可以在身体标签之外:)
首先使用preg_match提取body标签的内部HTML ...然后,如果您确切知道HTML中的链接,您可以使用常规strpos进行搜索。