我当前的代码
$this->data = $this->result->RetrieveDocumentResult;
$this->dom = new DOMDocument();
$this->dom->strictErrorChecking = false;
$this->dom->formatOutput = true;
$this->dom->loadHTML(base64_decode($this->data));
$exceptions = array(
'a' => array('href'),
'img' => array('src')
);
$this->stripAttributes($exceptions);
$this->stripSpanTags();
file_put_contents('Recode/' . $flname . '.html', base64_decode($this->data));
}
public function stripAttributes(array $exceptions)
{
$xpath = new DOMXPath($this->dom);
if (false === ($elements = $xpath->query("//*"))) die('Xpath error!');
/** @var $element DOMElement */
foreach ($elements as $element) {
for ($i = $element->attributes->length; --$i >= 0;) {
$this->tag = $element->nodeName;
$this->attribute = $element->attributes->item($i)->nodeName;
if ($this->checkAttrExceptions($exceptions)) continue;
$element->removeAttribute($this->attribute);
}
}
$this->data = base64_encode($this->dom->saveHTML());
}
public function checkAttrExceptions(array $exceptions)
{
foreach ($exceptions as $tag => $attributes) {
if (empty($attributes) || !is_array($attributes)) {
die('Attributes not set!');
}
foreach ($attributes as $attribute) {
if ($tag === $this->tag && $attribute === $this->attribute) {
return true;
}
}
}
return false;
}
/**
* Strip SPAN tags from current DOM document
*
* @return void
*/
/**
* Strip SPAN tags from current DOM document
*
* @return void
*/
protected function stripSpanTags ()
{
$nodes = $this->dom->getElementsByTagName('span');
while ($span = $nodes->item(0)) {
$replacement = $this->dom->createDocumentFragment();
while ($inner = $span->childNodes->item(0)) {
$replacement->appendChild($inner);
}
$span->parentNode->replaceChild($replacement, $span);
}
$this->data = base64_encode($this->dom->saveHTML());
}
}
想要删除HTML中的所有
,请执行以下操作
$html = str_replace(' ', '', $html);
但是混淆了如何以及在何处将其添加到第一组代码中。请帮助我
此外,这不应该覆盖第一组代码中的先前标记过滤器
答案 0 :(得分:0)
找到解决方案。,以下代码可以使用
$this->result = $this->soap->RetrieveDocument(
array('format' => 'html')
);
$this->data = $this->result->RetrieveDocumentResult;
$this->dom = new DOMDocument();
$this->dom->strictErrorChecking = false;
$this->dom->formatOutput = true;
$this->dom->loadHTML(base64_decode($this->data));
$exceptions = array(
'a' => array('href'),
'img' => array('src')
);
$this->stripAttributes($exceptions);
$this->stripSpanTags();
$decoded = base64_decode($this->data);
$decoded = $this->stripNonBreakingSpaces($decoded);
file_put_contents('Recode/' . $flname . '.html', $decoded);
}
public function stripAttributes(array $exceptions)
{
$xpath = new DOMXPath($this->dom);
if (false === ($elements = $xpath->query("//*"))) die('Xpath error!');
/** @var $element DOMElement */
foreach ($elements as $element) {
for ($i = $element->attributes->length; --$i >= 0;) {
$this->tag = $element->nodeName;
$this->attribute = $element->attributes->item($i)->nodeName;
if ($this->checkAttrExceptions($exceptions)) continue;
$element->removeAttribute($this->attribute);
}
}
$this->data = base64_encode($this->dom->saveHTML());
}
public function checkAttrExceptions(array $exceptions)
{
foreach ($exceptions as $tag => $attributes) {
if (empty($attributes) || !is_array($attributes)) {
die('Attributes not set!');
}
foreach ($attributes as $attribute) {
if ($tag === $this->tag && $attribute === $this->attribute) {
return true;
}
}
}
return false;
}
/**
* Strip SPAN tags from current DOM document
*
* @return void
*/
/**
* Strip SPAN tags from current DOM document
*
* @return void
*/
protected function stripSpanTags ()
{
$nodes = $this->dom->getElementsByTagName('span');
while ($span = $nodes->item(0)) {
$replacement = $this->dom->createDocumentFragment();
while ($inner = $span->childNodes->item(0)) {
$replacement->appendChild($inner);
}
$span->parentNode->replaceChild($replacement, $span);
}
$this->data = base64_encode($this->dom->saveHTML());
}
/**
* Replace all entities within a string with a regular space
*
* @param string $string Input string
*
* @return string
*/
protected function stripNonBreakingSpaces ($string)
{
return str_replace(' ', ' ', $string);
}
}