$content = preg_replace(
'#(<div class\=\"removethis\">(^.*)</div>)#is',
答案 0 :(得分:3)
答案 1 :(得分:2)
类的DIV标记。 (removethis
<?php // test.php Rev:20111219_1600
// Remove DIV start and end tags having this class attribute:
$class_to_remove = "removethis";
// Recursive regex matches an outermost DIV element and its contents.
$re = '% # Match outermost DIV element.
< # Start of HTML start tag
( # $1: DIV element start tag.
div # Tag name = DIV
( # $2: DIV start tag attributes.
(?: # Group for zero or more attributes.
\s+ # Required whitespace precedes attrib.
[\w.\-:]+ # Attribute name.
(?: # Group for optional attribute value.
\s*=\s* # Name and value separated by =
(?: # Group for value alternatives.
\'[^\']*\' # Either single quoted,
| "[^"]*" # or double quoted,
| [\w.\-:]+ # or unquoted value.
) # End group of value alternatives.
)? # Attribute value is optional.
)* # Zero or more attributes.
) # End $2: DIV start tag attributes.
\s* # Optional whitespace before closing >.
> # End DIV element start tag.
) # End $1: DIV element start tag.
( # $3: DIV element contents.
(?: # Group for zero or more content alts.
(?R) # Either a nested DIV element.
| # or non-DIV tag stuff.
[^<]* # {normal*} Non-< start of tag stuff.
(?: # Begin "unrolling-the-loop".
< # {special} A "<", but only if it is
(?:!/?div) # NOT start of a <div or </div
[^<]* # more {normal*} Non-< start of tag.
)* # End {(special normal*)*} construct.
)* # Zero or more content alternatives.
) # End $3: DIV element contents.
</div\s*> # DIV element end tag.
// Remove matching start and end tags of DIV elements having specific class.
function stripSpecialDivTags($text) {
global $re;
$text = preg_replace_callback($re,
'_stripSpecialDivTags_cb', $text);
$text = str_replace("<\0", '<', $text);
return $text;
function _stripSpecialDivTags_cb($matches) {
global $re, $class_to_remove;
if (preg_match($re, $matches[3])) {
$matches[3] = preg_replace_callback($re,
'_stripSpecialDivTags_cb', $matches[3]);
// Regex to match class attribute and capture value in $1.
$re_class = '/ ^ # Anchor to start of attributes string.
(?: # Zero or more non-class attributes.
\s+ # Required whitespace precedes attrib.
(?!class\b) # Match any attribute other than "CLASS".
[\w.\-:]+ # Attribute name.
(?: # Group for optional attribute value.
\s*=\s* # Name and value separated by =.
(?: # Group for value alternatives.
\'[^\']*\' # Either single quoted,
| "[^"]*" # or double quoted,
| [\w.\-:]+ # or unquoted value.
) # End group of value alternatives.
)? # Attribute value is optional.
)* # Zero or more non-class attributes.
\s+ # Required whitespace precedes attrib.
class\s*=\s* # "CLASS" is the attribute we need.
(?| # Use branch reset to capture value in $1.
\'([^\']*)\' # Either $1.1: a single quoted,
| "([^"]*)" # or $1.2: a double quoted,
| ([\w.\-:]+) # or $1.3: an un-quoted value.
) # End branch reset to capture value in $1.
$re_remove = '%(?<=^|\s)'.preg_quote($class_to_remove, '%').'(?=\s|$)%';
if (preg_match($re_class, $matches[2], $m)) {// If DIV has a CLASS,
if (preg_match($re_remove, $m[1])) { // AND it has special value,
return $matches[3]; // Then strip start and end DIV tags.
// Hide the start and end tags by inserting a temporary null char.
return "<\0". $matches[1] . $matches[3] . "<\0/div>";
$data = file_get_contents('testdata.html');
$output = stripSpecialDivTags($data);
file_put_contents('testdata_out.html', $output);
<div class="do not remove">
<div class=removethis>
<div class='do removethis one too'>
<div class="dontremovethisone">
<div class="do not remove">
<div class="dontremovethisone">
答案 2 :(得分:1)
$html = '<div class="foo">Hello world. <b>I am bold!</b></div>';
$allowed_tags = "<b>";
$text = strip_tags($html, $allowed_tags);
echo $text; #=> Hello world. <b>I am bold!</b>