DOM中的str_replace()

时间:2014-04-25 12:20:17

标签: php html dom

我当前的代码

 $this->data = $this->result->RetrieveDocumentResult;

        $this->dom = new DOMDocument();
        $this->dom->strictErrorChecking = false;
        $this->dom->formatOutput = true;
        $this->dom->loadHTML(base64_decode($this->data));

        $exceptions = array(
            'a'   => array('href'),
            'img' => array('src')
        );

        $this->stripAttributes($exceptions);
        $this->stripSpanTags();


        file_put_contents('Recode/' . $flname . '.html', base64_decode($this->data));
    }

    public function stripAttributes(array $exceptions)
    {
        $xpath = new DOMXPath($this->dom);
        if (false === ($elements = $xpath->query("//*"))) die('Xpath error!');

        /** @var $element DOMElement */
        foreach ($elements as $element) {
            for ($i = $element->attributes->length; --$i >= 0;) {
                $this->tag       = $element->nodeName;
                $this->attribute = $element->attributes->item($i)->nodeName;

                if ($this->checkAttrExceptions($exceptions)) continue;

                $element->removeAttribute($this->attribute);
            }
        }

        $this->data = base64_encode($this->dom->saveHTML());
    }

    public function checkAttrExceptions(array $exceptions)
    {
        foreach ($exceptions as $tag => $attributes) {
            if (empty($attributes) || !is_array($attributes)) {
                die('Attributes not set!');
            }

            foreach ($attributes as $attribute) {
                if ($tag === $this->tag && $attribute === $this->attribute) {
                    return true;
                }
            }
        }

        return false;
    }


    /**
     * Strip SPAN tags from current DOM document
     *
     * @return void
     */
    /**
     * Strip SPAN tags from current DOM document
     *
     * @return void
     */
    protected function stripSpanTags ()
    {
        $nodes = $this->dom->getElementsByTagName('span');

        while ($span = $nodes->item(0)) {
            $replacement = $this->dom->createDocumentFragment();
            while ($inner = $span->childNodes->item(0)) {
                $replacement->appendChild($inner);
            }
            $span->parentNode->replaceChild($replacement, $span);
        }
       $this->data = base64_encode($this->dom->saveHTML());

    }


}

想要删除HTML中的所有 ,请执行以下操作

$html = str_replace(' ', '', $html);

但是混淆了如何以及在何处将其添加到第一组代码中。请帮助我

此外,这不应该覆盖第一组代码中的先前标记过滤器

1 个答案:

答案 0 :(得分:0)

找到解决方案。,以下代码可以使用

$this->result = $this->soap->RetrieveDocument(
            array('format' => 'html')
        );

        $this->data = $this->result->RetrieveDocumentResult;

        $this->dom = new DOMDocument();
        $this->dom->strictErrorChecking = false;
        $this->dom->formatOutput = true;
        $this->dom->loadHTML(base64_decode($this->data));

        $exceptions = array(
            'a'   => array('href'),
            'img' => array('src')
        );

        $this->stripAttributes($exceptions);
        $this->stripSpanTags();

        $decoded = base64_decode($this->data);
        $decoded = $this->stripNonBreakingSpaces($decoded);

        file_put_contents('Recode/' . $flname . '.html', $decoded);
    }

    public function stripAttributes(array $exceptions)
    {
        $xpath = new DOMXPath($this->dom);
        if (false === ($elements = $xpath->query("//*"))) die('Xpath error!');

        /** @var $element DOMElement */
        foreach ($elements as $element) {
            for ($i = $element->attributes->length; --$i >= 0;) {
                $this->tag       = $element->nodeName;
                $this->attribute = $element->attributes->item($i)->nodeName;

                if ($this->checkAttrExceptions($exceptions)) continue;

                $element->removeAttribute($this->attribute);
            }
        }

        $this->data = base64_encode($this->dom->saveHTML());
    }

    public function checkAttrExceptions(array $exceptions)
    {
        foreach ($exceptions as $tag => $attributes) {
            if (empty($attributes) || !is_array($attributes)) {
                die('Attributes not set!');
            }

            foreach ($attributes as $attribute) {
                if ($tag === $this->tag && $attribute === $this->attribute) {
                    return true;
                }
            }
        }

        return false;
    }


    /**
     * Strip SPAN tags from current DOM document
     *
     * @return void
     */
    /**
     * Strip SPAN tags from current DOM document
     *
     * @return void
     */
    protected function stripSpanTags ()
    {
        $nodes = $this->dom->getElementsByTagName('span');

        while ($span = $nodes->item(0)) {
            $replacement = $this->dom->createDocumentFragment();
            while ($inner = $span->childNodes->item(0)) {
                $replacement->appendChild($inner);
            }
            $span->parentNode->replaceChild($replacement, $span);
        }

        $this->data = base64_encode($this->dom->saveHTML());
    }


    /**
     * Replace all   entities within a string with a regular space
     *
     * @param string $string Input string
     *
     * @return string
     */
    protected function stripNonBreakingSpaces ($string)
    {
        return str_replace(' ', ' ', $string);
    }


}