我在这里使用简单的html doc来从源代码中获取数据,然后根据我的需要对其进行过滤
//including script
include($config>root.'/script/vendor/simple_html_dom/simple_html_dom.php');
//getting all data
$url = "www.example.com";
$html = file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=false, $defaultBRText=DEFAULT_SPAN_TEXT , $defaultSpanText=DEFAULT_SPAN_TEXT);
//title
$title = $html->find('#question-header h1 a',0)->innertext;
//comments
foreach($html->find('#question .comment-body') as $element) {
$question_comments[] = $element->innertext;
}
//{running a lot of loops like above}
//{than i have a final result inside output}
$output = ob_get_clean();
{here i need some stack help to add attribute to all anchor tags inside a particular div}
echo $output;
?>
例如这是我输入的$ output变量
$ output ="
<!DOCTYPE html>
<html lang='en'>
<head>
<meta charset='UTF-8'>
<title>Example</title>
</head>
<body>
<div class='container'>
<a href='/link-1'></a>
<div class='data'>
<a href='/link-2'></a>
<a href='/link-3'></a>
<a href='/link-4'></a>
</div>
</div>
<div class='footer'>
<a href='/new-link'></a>
</div>
</body>
</html>
&#34 ;; 我想添加属性rel =&#39; no-follow&#39;到容器内的所有锚标签
答案 0 :(得分:0)
试试这个。希望这个会有所帮助。
如果您希望此解决方案也适用于更多嵌套标记,则可以删除注释。
<?php
ini_set('display_errors', 1);
$string =<<<HTML
<!DOCTYPE html>
<html lang='en'>
<head>
<meta charset='UTF-8'>
<title>Example</title>
</head>
<body>
<div class='container'>
<a href='/link-1'></a>
<div class='data'>
<a href='/link-2'></a>
<a href='/link-3'></a>
<a href='/link-4'></a>
</div>
</div>
<div class='footer'>
<a href='/new-link'></a>
</div>
</body>
</html>
HTML;
$domDocument = new DOMDocument();
$domDocument->loadHTML($string);
$domXPath = new DOMXPath($domDocument);
$results = $domXPath->query("//div[@class='container']/*");
foreach($results as $result)
{
if($result instanceof DOMElement && $result->tagName=="a")
{
$result->setAttribute("rel", "no-follow");
}
else
{
addAttribute($result);
}
}
function addAttribute($result)
{
global $domXPath;
$results = $domXPath->query("./a",$result);//change query to ./* to make it work with nested HTML.
foreach($results as $result)
{
if($result instanceof DOMElement && $result->tagName=="a")
{
$result->setAttribute("rel", "no-follow");
}
//Uncomment these lines to work with more nested HTML.
//else
//{
// addAttribute($result);
//}
}
}
echo $domDocument->saveHTML();