WP HTML缩小类无法正常工作

时间:2013-03-07 23:42:01

标签: php regex wordpress minify

我在http://www.intert3chmedia.net/2011/12/minify-html-javascript-css-without.html找到了这个脚本。它应该缩小(不像类名所示压缩)HTML:

<?php
class WP_HTML_Compression
{
    // Settings
    protected $compress_css = true;
    protected $compress_js = true;
    protected $info_comment = true;
    protected $remove_comments = true;

    // Variables
    protected $html;
    public function __construct($html)
    {
        if (!empty($html))
        {
            $this->parseHTML($html);
        }
    }
    public function __toString()
    {
        return $this->html;
    }
    protected function bottomComment($raw, $compressed)
    {
        $raw = strlen($raw);
        $compressed = strlen($compressed);

        $savings = ($raw-$compressed) / $raw * 100;

        $savings = round($savings, 2);

        return '<!--HTML compressed, size saved '.$savings.'%. From '.$raw.' bytes, now '.$compressed.' bytes-->';
    }
    protected function minifyHTML($html)
    {
        $pattern = '/<(?<script>script).*?<\/script\s*>|<(?<style>style).*?<\/style\s*>|<!(?<comment>--).*?-->|<(?<tag>[\/\w.:-]*)(?:".*?"|\'.*?\'|[^\'">]+)*>|(?<text>((<[^!\/\w.:-])?[^<]*)+)|/si';
        preg_match_all($pattern, $html, $matches, PREG_SET_ORDER);
        $overriding = false;
        $raw_tag = false;
        // Variable reused for output
        $html = '';
        foreach ($matches as $token)
        {
            $tag = (isset($token['tag'])) ? strtolower($token['tag']) : null;

            $content = $token[0];

            if (is_null($tag))
            {
                if ( !empty($token['script']) )
                {
                    $strip = $this->compress_js;
                }
                else if ( !empty($token['style']) )
                {
                    $strip = $this->compress_css;
                }
                else if ($content == '<!--wp-html-compression no compression-->')
                {
                    $overriding = !$overriding;

                    // Don't print the comment
                    continue;
                }
                else if ($this->remove_comments)
                {
                    if (!$overriding && $raw_tag != 'textarea')
                    {
                        // Remove any HTML comments, except MSIE conditional comments
                        $content = preg_replace('/<!--(?!\s*(?:\[if [^\]]+]|<!|>))(?:(?!-->).)*-->/s', '', $content);
                    }
                }
            }
            else
            {
                if ($tag == 'pre' || $tag == 'textarea')
                {
                    $raw_tag = $tag;
                }
                else if ($tag == '/pre' || $tag == '/textarea')
                {
                    $raw_tag = false;
                }
                else
                {
                    if ($raw_tag || $overriding)
                    {
                        $strip = false;
                    }
                    else
                    {
                        $strip = true;

                        // Remove any empty attributes, except:
                        // action, alt, content, src
                        $content = preg_replace('/(\s+)(\w++(?<!\baction|\balt|\bcontent|\bsrc)="")/', '$1', $content);

                        // Remove any space before the end of self-closing XHTML tags
                        // JavaScript excluded
                        $content = str_replace(' />', '/>', $content);
                    }
                }
            }

            if ($strip)
            {
                $content = $this->removeWhiteSpace($content);
            }

            $html .= $content;
        }

        return $html;
    }

    public function parseHTML($html)
    {
        $this->html = $this->minifyHTML($html);

        if ($this->info_comment)
        {
            $this->html .= "\n" . $this->bottomComment($html, $this->html);
        }
    }

    protected function removeWhiteSpace($str)
    {
        $str = str_replace("\t", ' ', $str);
        $str = str_replace("\n",  '', $str);
        $str = str_replace("\r",  '', $str);

        while (stristr($str, '  '))
        {
            $str = str_replace('  ', ' ', $str);
        }

        return $str;
    }
}

function wp_html_compression_finish($html)
{
    return new WP_HTML_Compression($html);
}

function wp_html_compression_start()
{
    ob_start('wp_html_compression_finish');
}
add_action('get_header', 'wp_html_compression_start');
?>

我的HTML在顶部有IE条件,如下所示:

<!--[if lt IE 7 ]><html class="ie ie6" lang="en"><![endif]-->
<!--[if IE 7 ]><html class="ie ie7" lang="en"><![endif]-->
<!--[if IE 8 ]><html class="ie ie8" lang="en"><![endif]-->
<!--[if (gte IE 9)|!(IE)]><!--><html lang="en"><!--<![endif]-->

IE条件下的所有内容都正确缩小。结果是一长串的HTML。问题是IE条件之上的所有内容似乎都没有缩小。回车仍然保留。任何想法有什么不对?

3 个答案:

答案 0 :(得分:3)

自定义方法怎么样?

public static function html_compress($buffer)
{
    $chunks = preg_split( '/(<pre.*?\/pre>)/ms', $buffer, -1, PREG_SPLIT_DELIM_CAPTURE );
    $buffer = '';
    $replace = array(
        '#[\n\r\t\s]+#'           => ' ',  // remove new lines & tabs
        '#>\s{2,}<#'              => '><', // remove inter-tag whitespace
        '#\/\*.*?\*\/#i'          => '',   // remove CSS & JS comments
        '#<!--(?![\[>]).*?-->#si' => '',   // strip comments, but leave IF IE (<!--[...]) and "<!-->""
        '#\s+<(html|head|meta|style|/style|title|script|/script|/body|/html|/ul|/ol|li)#' => '<$1', // before those elements, whitespace is dumb, so kick it out!!
        '#\s+(/?)>#' => '$1>', // just before the closing of " >"|" />"
        '#class="\s+#'=> 'class="', // at times, there is whitespace before class=" className"
        '#(script|style)>\s+#' => '$1>', // <script> var after_tag_has_whitespace = 'nonsens';
    );
    $search = array_keys($replace);
    foreach ( $chunks as $c )
    {
        if ( strpos( $c, '<pre' ) !== 0 )
        {
            $c = preg_replace($search, $replace, $c);
        }
        $buffer .= $c;
    }
    return $buffer;
}

GitHub repository

答案 1 :(得分:2)

该网站刚刚接受了代码并将其重命名。以某种方式使用“他们的”代码比使用实际的插件更好?哦,好吧。

有一个适用于WordPress和标准PHP的新版本: http://www.svachon.com/blog/html-minify/

答案 2 :(得分:1)

此处还有其他问题。

我将这个“类”缩减为单页PHP脚本(将任何$ this-&gt;调用替换为true(在ivars的情况下)或静态函数调用。

然后我使用长HTML输入运行了各种测试,并尝试使用条件注释,包括条件&lt; html&gt;中的复制粘贴。您之前拥有的标签。每次结果都是单行输出。

总之,有一些可能性:

  1. HTML中还有其他内容将其丢弃(链接到实际页面?)
  2. 在添加额外HTML(非常可能)之前进行缩小
  3. 其中一个ivars不是真的(不太可能)
  4. 正在将<!--wp-html-compression no compression-->插入文档的该部分
  5. 为什么不在minifyHTML($ html)方法的开头插入error_log($html);(或类似的)来查看原始HTML的样子。我的猜测是,结果会告诉你它为什么不起作用。 (我猜你也可以return $html;。)

    EDIT --- 这是我的“脚本”,请注意保留条件注释,并删除非条件HTML注释。而且我根本不打扰行格式,因为它只是你提供的原始课程中的黑客工作。

    <?php
    $html = <<<EOF
    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
    <!--[if lt IE 7 ]><html class="ie ie6" lang="en"><![endif]-->
    <!--[if IE 7 ]><html class="ie ie7" lang="en"><![endif]-->
    <!--[if IE 8 ]><html class="ie ie8" lang="en"><![endif]-->
    <!--[if (gte IE 9)|!(IE)]><!--><html lang="en"><!--<![endif]-->
    <head>
    <meta http-equiv="X-UA-Compatible" content="IE=8">
    
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <title>Page Title</title>
    <meta name="description" content="desc" />
    <meta name="keywords" content="c" />
    <meta name="robots" content="INDEX,FOLLOW" />
    <link rel="icon" href="http://www.stackoverflow.com/" type="image/x-icon" />
    <link rel="shortcut icon" href="http://www.stackoverflow.com/" type="image/x-icon" />
    <!--[if lt IE 7]>
    <script type="text/javascript">
    //<![CDATA[
        var BLANK_URL = '';
    //]]>
    </script>
    <![endif]-->
    <!--[if lt IE 8]>
    <style type="text/css">
        .single-call-to-action .action-text {
            filter:progid:DXImageTransform.Microsoft.gradient(startColorstr=#66000000, endColorstr=#66000000);
        }
        #PaypalSideBar a img {
            left: 0 !important;
            top: 0 !important;
            width: 182px !important;
            height: 96px !important;
        }
    </style>
    <![endif]-->
    <!--[if lt IE 9]>
    <style type="text/css">
        .single-call-to-action .action-text {
            -ms-filter: "progid:DXImageTransform.Microsoft.gradient(startColorstr=#66000000, endColorstr=#66000000)";
        }
    </style>
    <![endif]-->
    <script type="text/javascript" src="//ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js"></script>
    <script type="text/javascript">jQuery.noConflict();</script>
    
    <link href="//fonts.googleapis.com/css?family=Marck+Script" rel="stylesheet" type="text/css">
    
    <link rel="stylesheet" type="text/css" href="http://www.stackoverflow.com/test.css" media="all" />
    
    <!--[if lt IE 8]><link rel="stylesheet" type="text/css" href="http://www.stackoverflow.com/test.css" media="all" /><![endif]-->
    <!--[if lt IE 7]><script type="text/javascript" src="http://www.stackoverflow.com/test.js"></script><script type="text/javascript" src="http://www.stackoverflow.com/test.js"></script><![endif]-->
    
    
    <script type="text/javascript">
    //<![CDATA[
    Mage.Cookies.path     = '/';
    //]]>
    </script>
    
    <script type="text/javascript">
    //<![CDATA[
    optionalZipCountries = ["US"];
    //]]>
    </script>
    <!-- BEGIN GOOGLE ANALYTICS CODEs -->
    <script type="text/javascript">
    //<![CDATA[
        var _gaq = _gaq || [];
    
    _gaq.push(['_setAccount', 'UA-x']);
    _gaq.push(['_setDomainName', 'http://www.stackoverflow.com/']);
    _gaq.push(['_trackPageview']);
    
        (function() {
            var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
            ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
            var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
        })();
    
    //]]>
    </script>
    <!-- END GOOGLE ANALYTICS CODE -->
    <script type="text/javascript">
        //<![CDATA[
        window.HDUSeed = 'x';
        window.HDUSeedIntId = setInterval(function() {
            if (document.observe) {
                document.observe('dom:loaded', function() {
                    for (var i = 0; i < document.forms.length; i++) {
                        if (document.forms[i].getAttribute('action') && document.forms[i].getAttribute('action').match('contacts/index/post')) {
                            var el = document.createElement('input');
                            el.type = ('hidden');
                            el.name = 'hdu_seed';
                            el.value = window.HDUSeed;
                            document.forms[i].appendChild(el);
    
                        }
                    }
                });
                clearInterval(window.HDUSeedIntId)
            }
        }, 100)
        //]]>
    </script>
    <script type="text/javascript">//<![CDATA[
            var Translator = new Translate([]);
            //]]></script><meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=Edge"/>
    
       <style type="text/css">
       /*default iteaser*/
       #TT2ILTbox {
         background-color: #f3f3f3;
         border: 1px solid #d2d2d2;
         padding: 10px;
         text-align: center;
         width: 280px;
         margin-bottom: 10px;
         margin-top: 10px;
       }
    
       #TT2ILTbox h2 {
         font-size: 12px;
         font-weight: bold;
         margin: 5px 0 5px 0;
       }
    
       #TT2ILTcount-line * {
         font-size: 11px;
       }
    
       #TT2ILTcount-line strong {
         font: bold 11px Arial;
       }
    
       #TT2ILTcount-line p {
         margin: 5px 0 5px 0;
       }
    
       #TT2ILTbutton-holder {
         display: -moz-box !important;
         display: block;
         height: 31px;
         text-align: center;
       }
    
       a.TT2ILTbutton, a.TT2ILTbutton span {
         background-color: #fa002f;
       }
    
       .TT2ILTbutton * {
         font: bold 12px Arial;
    
       }
    
       a.TT2ILTbutton {
         background-image: url('http://static.www.turnto.com/tra2/images/iteaser/1/button-right.png');
         background-repeat: no-repeat;
         background-position: top right;
         display: block;
         height: 31px;
         margin-right: 6px;
         padding-right: 16px;
         text-decoration: none;
         color: white;
       }
    
       a.TT2ILTbutton span {
         background-image: url('http://static.www.turnto.com/tra2/images/iteaser/1/button-left.png');
         background-repeat: no-repeat;
         display: block;
         line-height: 22px;
         padding: 2px 0 7px 18px;
       }
    
       a.TurnToIteaSee {
         font-size: 11px;
         text-decoration: none;
         color: #000;
         cursor: pointer;
       }
       </style>
    
    
    </head>
    <body class=" cms-index-index cms-home">
    <div class="wrapper">
            <noscript>
            <div class="global-site-notice noscript">
                <div class="notice-inner">
                    <p>
                        <strong>JavaScript seems to be disabled in your browser.</strong><br />
                        You must have JavaScript enabled in your browser to utilize the functionality of this website.                </p>
                </div>
            </div>
        </noscript>
        <div class="page">
        </div>
    </div>
    </body>
    </html>
    
    
    EOF;
    
    function removeWhiteSpace($str)
        {
            $str = str_replace("\t", ' ', $str);
            $str = str_replace("\n",  '', $str);
            $str = str_replace("\r",  '', $str);
    
            while (stristr($str, '  '))
            {
                $str = str_replace('  ', ' ', $str);
            }
    
            return $str;
        }
    
    
    $pattern = '/<(?<script>script).*?<\/script\s*>|<(?<style>style).*?<\/style\s*>|<!(?<comment>--).*?-->|<(?<tag>[\/\w.:-]*)(?:".*?"|\'.*?\'|[^\'">]+)*>|(?<text>((<[^!\/\w.:-])?[^<]*)+)|/si';
            preg_match_all($pattern, $html, $matches, PREG_SET_ORDER);
            $overriding = false;
            $raw_tag = false;
            // Variable reused for output
            $html = '';
            foreach ($matches as $token)
            {
                $tag = (isset($token['tag'])) ? strtolower($token['tag']) : null;
    
                $content = $token[0];
    
                if (is_null($tag))
                {
                    if ( !empty($token['script']) )
                    {
                        $strip = true;
                    }
                    else if ( !empty($token['style']) )
                    {
                        $strip = true;
                    }
                    else if ($content == '<!--wp-html-compression no compression-->')
                    {
                        $overriding = !$overriding;
    
                        // Don't print the comment
                        continue;
                    }
                    else if (true)
                    {
                        if (!$overriding && $raw_tag != 'textarea')
                        {
                            // Remove any HTML comments, except MSIE conditional comments
                            $content = preg_replace('/<!--(?!\s*(?:\[if [^\]]+]|<!|>))(?:(?!-->).)*-->/s', '', $content);
                        }
                    }
                }
                else
                {
                    if ($tag == 'pre' || $tag == 'textarea')
                    {
                        $raw_tag = $tag;
                    }
                    else if ($tag == '/pre' || $tag == '/textarea')
                    {
                        $raw_tag = false;
                    }
                    else
                    {
                        if ($raw_tag || $overriding)
                        {
                            $strip = false;
                        }
                        else
                        {
                            $strip = true;
    
                            // Remove any empty attributes, except:
                            // action, alt, content, src
                            $content = preg_replace('/(\s+)(\w++(?<!\baction|\balt|\bcontent|\bsrc)="")/', '$1', $content);
    
                            // Remove any space before the end of self-closing XHTML tags
                            // JavaScript excluded
                            $content = str_replace(' />', '/>', $content);
                        }
                    }
                }
    
                if ($strip)
                {
                    $content = removeWhiteSpace($content);
                }
    
                $html .= $content;
            }
    
            echo $html;