从html中自动删除缩进,不需要的空白区域(缩小)

时间:2010-11-30 12:14:06

标签: asp.net asp.net-mvc

我有一个asp.net-mvc应用程序,当我查看页面的源代码时,我看到html是缩进的,有很多空格,

我想如果我删除所有这些空格,我的页面将会变小(KB)

有人知道如何自动删除它们吗?

3 个答案:

答案 0 :(得分:2)

取自http://madskristensen.net/post/A-whitespace-removal-HTTP-module-for-ASPNET-20.aspx

[AttributeUsage(AttributeTargets.Class, Inherited = true, AllowMultiple = false)]
internal class WhiteSpaceFilterAttribute : ActionFilterAttribute
{
    public override void OnActionExecuting(ActionExecutingContext filterContext)
{
        filterContext.HttpContext.Response.Filter = new WhiteSpaceStream(filterContext.HttpContext.Response.Filter);
    }
}

internal class WhiteSpaceStream : Stream
{
    private Stream m_sink;
    private static Regex m_regex = new Regex(@"(?<=[^])\t{2,}|(?<=[>])\s{2,}(?=[<])|(?<=[>])\s{2,11}(?=[<])|(?=[\n])\s{2,}");
    //private static Regex m_regex = new Regex(@"^\s+", RegexOptions.Multiline | RegexOptions.Compiled); 

    public WhiteSpaceStream(Stream sink)
    {
        m_sink = sink;
    }

    public override bool CanRead
    {
        get { return true; }
    }

    public override bool CanSeek
    {
        get { return true; }
    }

    public override bool CanWrite
    {
        get { return true; }
    }

    public override void Flush()
    {
        m_sink.Flush();
    }

    public override long Length
    {
        get { return 0; }
    }

    private long _position;
    public override long Position
    {
        get { return _position; }
        set { _position = value; }
    }

    public override int Read(byte[] buffer, int offset, int count)
    {
        return m_sink.Read(buffer, offset, count);
    }

    public override long Seek(long offset, SeekOrigin origin)
    {
        return m_sink.Seek(offset, origin);
    }

    public override void SetLength(long value)
    {
        m_sink.SetLength(value);
    }

    public override void Close()
    {
        m_sink.Close();
    }

    public override void Write(byte[] buffer, int offset, int count)
    {
        byte[] data = new byte[count];
        Buffer.BlockCopy(buffer, offset, data, 0, count);
        string text = Encoding.Default.GetString(buffer);

        text = m_regex.Replace(text, string.Empty);

        byte[] outdata = System.Text.Encoding.Default.GetBytes(text);
        m_sink.Write(outdata, 0, outdata.GetLength(0));
    }

答案 1 :(得分:2)

我想推荐以下代码。它工作得很好(我在几个网站上使用它),它比@ David的版本更简单:

using System;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using System.Web.Mvc;

public class WhitespaceStrip : ActionFilterAttribute {
    public override void OnActionExecuting(
        ActionExecutingContext Context) {
        try {
            Context.HttpContext.Response.Filter = new WhitespaceFilter();
        } catch (Exception) {
            //  Ignore
        };
    }
}

public class WhitespaceFilter : MemoryStream {
    private HttpResponse Response = HttpContext.Current.Response;
    private Stream Filter = null;

    private string Source = string.Empty;
    private string[] ContentTypes = new string[1] {
        "text/html"
    };

    public WhitespaceFilter() {
        this.Filter = this.Response.Filter;
    }

    public override void Write(
        byte[] Buffer,
        int Offset,
        int Count) {
        this.Source = Encoding.UTF8.GetString(Buffer);

        if (this.ContentTypes.Contains(this.Response.ContentType)) {
            this.Response.ContentEncoding = Encoding.UTF8;

            this.Source = new Regex("(<pre>[^<>]*(((?<Open><)[^<>]*)+((?<Close-Open>>)[^<>]*)+)*(?(Open)(?!))</pre>)|\\s\\s+|[\\t\\n\\r]", RegexOptions.Compiled | RegexOptions.Singleline).Replace(this.Source, "$1");
            this.Source = new Regex("<!--.*?-->", RegexOptions.Compiled | RegexOptions.Singleline).Replace(this.Source, string.Empty);

            this.Filter.Write(Encoding.UTF8.GetBytes(this.Source), Offset, Encoding.UTF8.GetByteCount(this.Source));
        } else {
            this.Filter.Write(Encoding.UTF8.GetBytes(this.Source), Offset, Encoding.UTF8.GetByteCount(this.Source));
        };
    }
}

<强>更新

@Omu,只是因为当你说“6x”比较慢时让我感到恼火,我开始看你是否正确。我最后重新编写了过滤器并将其清理了一下,然后我运行了一些测试,在这里我循环了一个表10,000,以生成一些空白区域并查看过滤器的工作原理。完成所有这些后,我发现两个正则表达式之间没有区别。

现在,如果你暗示表达方式的工作方式不同而且我的速度会慢一点,那可能会有一些道理,但是为了让你看到任何差异,你必须推出超过1 MB大小的 HTML 页面......我希望这不是你正在做的事情。

此外,我的表达式在<pre>元素中保留了空格......

所有这些都说,这是我的修订版本:

using System;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using System.Web.Mvc;

[AttributeUsage(AttributeTargets.Class, Inherited = true, AllowMultiple = false)]
internal class WhitespaceStripAttribute : ActionFilterAttribute {
    public override void OnActionExecuted(
        ActionExecutedContext ActionExecutedContext) {
        ActionExecutedContext.HttpContext.Response.Filter = new WhitespaceStream(ActionExecutedContext.HttpContext);
    }
}

internal class WhitespaceStream : MemoryStream {
    private readonly HttpContextBase HttpContext = null;
    private readonly Stream FilterStream = null;

    private readonly string[] ContentTypes = new string[1] {
        "text/html"
    };

    private static Regex WhitespaceRegex = new Regex("(<pre>[^<>]*(((?<Open><)[^<>]*)+((?<Close-Open>>)[^<>]*)+)*(?(Open)(?!))</pre>)|\\s\\s+|[\\t\\n\\r]", RegexOptions.Singleline | RegexOptions.Compiled);
    private static Regex CommentsRegex = new Regex("<!--.*?-->", RegexOptions.Singleline | RegexOptions.Compiled);

    public WhitespaceStream(
        HttpContextBase HttpContext) {
        this.HttpContext = HttpContext;
        this.FilterStream = HttpContext.Response.Filter;
    }

    public override void Write(
        byte[] Buffer,
        int Offset,
        int Count) {
        string Source = Encoding.UTF8.GetString(Buffer);

        if (this.ContentTypes.Any(
            ct =>
                (ct == this.HttpContext.Response.ContentType))) {
            this.HttpContext.Response.ContentEncoding = Encoding.UTF8;

            Source = WhitespaceRegex.Replace(Source, "$1");
            Source = CommentsRegex.Replace(Source, string.Empty);
        };

        this.FilterStream.Write(Encoding.UTF8.GetBytes(Source), Offset, Encoding.UTF8.GetByteCount(Source));
    }
}

答案 2 :(得分:1)

在HTML中,多个空格被视为一个空格,您可以在响应中使用正则表达式:

/\s+/ /g

将任何连续的空格转换为单个空格。

请注意,虽然这会减少未压缩页面的大小,但如果你正在对页面进行压缩,那么节省的费用就不会那么大。

警告:这可能会破坏内联JavaScript,因为JS将端线视为命令分隔符(即;)。如果你的JS使用;来分隔命令(大多数JS都是这样),你应该没问题。

此外,code samples in <pre> blocks将受到影响,因为在那里显示空格:

some   code   here {
  more          code }

变为

some code here { more code }