将HTML转换为字符串

时间:2011-04-05 20:30:18

标签: html asp.net-mvc c#-4.0

我有一个字符串编写器函数,它捕获HTMl并以字符串形式返回。例如

\r\n\r\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\r\n\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\" >\r\n<head>\r\n <link rel=\"Stylesheet\" href=\"../../Content/style.css\" type=\"text/css\" />\r\n <title>Cover Page</title>\r\n <style type=\"text/css\">\r\n html, body\r\n {\r\n\t font-family: Arial, Helvetica, sans-serif;\r\n\t font-size: 13pt;\r\n\t padding: 0px;\r\n\t margin: 0px;\r\n\t background-color: #FFFFFF;\r\n\t color: black;\r\n\t width: 680px;\r\n }\r\n </style>\r\n</head>\r\n<body>\r\n <div>\r\n Ssotest Ssotest, \r\n </div> \r\n</body>\r\n</html>\r\n"

当我将它传递给PDF生成工具时,它会抛出一个错误。但是当我从VS2010的Locals窗口复制String writer的输出(上面的相同HTML字符串)并硬编码就像

 string test ="\r\n\r\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\r\n\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\" >\r\n<head>\r\n    <link rel=\"Stylesheet\" href=\"../../Content/style.css\" type=\"text/css\" />\r\n    <title>Cover Page</title>\r\n    <style type=\"text/css\">\r\n        html, body\r\n        {\r\n\t        font-family: Arial, Helvetica, sans-serif;\r\n\t        font-size: 13pt;\r\n\t        padding: 0px;\r\n\t        margin: 0px;\r\n\t        background-color: #FFFFFF;\r\n\t        color: black;\r\n\t        width: 680px;\r\n        }\r\n    </style>\r\n</head>\r\n<body>\r\n    <div>\r\n        Ssotest Ssotest, \r\n    </div> \r\n</body>\r\n</html>\r\n"

并传递给工具它工作正常。在这两种情况下,字符串都是相同的。我想知道是什么产生了影响?当我复制文本和硬编码时,是否有东西被转换?有什么建议??

只是更新.. 我用这段代码格式化

 public class ReplaceString
        {
            static readonly IDictionary<string, string> m_replaceDict
                = new Dictionary<string, string>();

            const string ms_regexEscapes = @"[\a\b\f\n\r\t\v\\""]";

            public static string StringLiteral(string i_string)
            {
                return Regex.Replace(i_string, ms_regexEscapes, match);
            }

            public static string CharLiteral(char c)
            {
                return c == '\'' ? @"'\''" : string.Format("'{0}'", c);
            }

            private static string match(Match m)
            {
                string match = m.ToString();
                if (m_replaceDict.ContainsKey(match))
                {
                    return m_replaceDict[match];
                }

                throw new NotSupportedException();
            }

            static ReplaceString()
            {
                m_replaceDict.Add("\a", @"\a");
                m_replaceDict.Add("\b", @"\b");
                m_replaceDict.Add("\f", @"\f");
                m_replaceDict.Add("\n", @"\n");
                m_replaceDict.Add("\r", @"\r");
                m_replaceDict.Add("\t", @"\t");
                m_replaceDict.Add("\v", @"\v");

                m_replaceDict.Add("\\", @"\\");
                m_replaceDict.Add("\0", @"\0");

                //The SO parser gets fooled by the verbatim version 
                //of the string to replace - @"\"""
                //so use the 'regular' version
                m_replaceDict.Add("\"", "\\\"");
            }

            static void Main(string[] args)
            {

                string s = "here's a \"\n\tstring\" to test";
                Console.WriteLine(ReplaceString.StringLiteral(s));
                Console.WriteLine(ReplaceString.CharLiteral('c'));
                Console.WriteLine(ReplaceString.CharLiteral('\''));

            }
        }

但是字符串会像

一样返回
\\r\\n\\r\\n<!DOCTYPE html PUBLIC \\\"-//W3C//DTD XHTML 1.0 Transitional//EN\\\" \\\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\\\">\\r\\n\\r\\n<html xmlns=\\\"http://www.w3.org/1999/xhtml\\\" >\\r\\n<head>\\r\\n    <link rel=\\\"Stylesheet\\\...."

哪个有意义.. 我正在使用的PDF生成器的代码

string test="\r\n\r\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\r\n\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\" >\r\n<head>\r\n <link rel=\"Stylesheet\" href=\"../../Content/style.css\" type=\"text/css\" />\r\n <title>Cover Page</title>\r\n <style type=\"text/css\">\r\n html, body\r\n {\r\n\t font-family: Arial, Helvetica, sans-serif;\r\n\t font-size: 13pt;\r\n\t padding: 0px;\r\n\t margin: 0px;\r\n\t background-color: #FFFFFF;\r\n\t color: black;\r\n\t width: 680px;\r\n }\r\n </style>\r\n</head>\r\n<body>\r\n <div>\r\n Ssotest Ssotest, \r\n </div> \r\n</body>\r\n</html>\r\n"

           FileStreamResponseContext response = new FileStreamResponseContext();
 Document doc = new Document();
            doc.DocumentInformation.CreationDate = DateTime.Now;
            doc.DocumentInformation.Title = "Test Plan";
            doc.DocumentInformation.Subject = "Test Plan";
            doc.CompressionLevel = CompressionLevel.NormalCompression;
            doc.Margins = new Margins(0, 0, 0, 0);
            doc.Security.CanPrint = true;
            doc.ViewerPreferences.HideToolbar = false;
            doc.ViewerPreferences.FitWindow = false;

string baseUrl = String.Format("http://localhost{0}/", Request.Url.Port == 80?"":":" + Request.Url.Port.ToString());

PdfPage docTestPlan = doc.AddPage(PageSize.Letter, new Margins(0, 0, 0, 0), PageOrientation.Portrait);
// passing the string test returned from the string writer

   HtmlToPdfElement htmlToPdf = new HtmlToPdfElement(test, baseUrl);
            htmlToPdf.FitWidth = false;
            docTestPlan.AddElement(htmlToPdf);



            /******************************************
             * put doc in a memory stream for return */
            response.FileDataStream = new MemoryStream();
            doc.Save(response.FileDataStream);
            doc.Close();
            response.FileDataStream.Position = 0;

            return new FileStreamResult(response.FileDataStream, "application/pdf");

4 个答案:

答案 0 :(得分:0)

不确定您使用的是哪个软件,但对我来说,看起来您正在将字符串放入某些程序化上下文中,而这些内容正在被转义。

因此,当工具获得原始复制输入时,它会看到:\"

但是当它从编程上下文中获取时,它只会看到:"

答案 1 :(得分:0)

我不确定您是否正确地将值传递给PDF API。你能用这个代码更新吗?

编辑:你不应该退回文件本身吗?那会有标题信息而不是流吗?

HTML STRING示例:

StringWriter sw = new StringWriter();
        Server.Execute("PageToConvert.aspx", sw);
        string htmlCodeToConvert = sw.GetStringBuilder().ToString();

答案 2 :(得分:0)

经过长时间的斗争,我找到了原因。导致该错误的原因是,当从编程上下文传递字符串时,它继续添加到System.Web.HttpResponseBase Response对象。当我通过硬编码直接传递字符串时,它不会再次搞乱  System.Web.HttpResponseBase Response对象。所以最终的解决方案是添加一段代码Response.clear();,它清除所有以前的Response对象。现在它的工作正常。谢谢大家的建议。欢呼声!!

答案 3 :(得分:0)

protected void btnExport_Click(object sender,EventArgs e)     {         HtmlForm form = new HtmlForm();        // form.Controls.Add(GridView1);         StringWriter sw = new StringWriter();         HtmlTextWriter hTextWriter = new HtmlTextWriter(sw);         //form.Controls[0].RenderControl(hTextWriter);         string htmlDisplayText = @“


&lt; html&gt;
&lt; body bgcolor =”red“&gt;
&lt; h4&gt;亲爱的bishnu2&lt; / h4&gt;

你的地址pdp是

这个模式的早期版本在PLoP上进行了研讨会经过多次内部研讨会和更新,后来的版本

在PLoP上进行了研讨会模式是现在已经足够成熟,我根据AG Communication

系统的模式教授课程。
版权所有©1999 AG Communication Systems Corporation

&lt; / body&gt;&lt; / HTML&GT;

“;        // string htmlDisplayText = sw.ToString();         Document Doc = new Document();

    //PdfWriter.GetInstance
    //(Doc, new FileStream(Request.PhysicalApplicationPath 
    //+ "\\AmitJain.pdf", FileMode.Create));

    PdfWriter.GetInstance(Doc, new FileStream(Environment.GetFolderPath
    (Environment.SpecialFolder.Desktop)+ "\\AmitJain.pdf", FileMode.Create));
    Doc.Open();



    System.Xml.XmlTextReader xmlReader = 
    new System.Xml.XmlTextReader(new StringReader(htmlDisplayText));
    HtmlParser.Parse(Doc, xmlReader);

    Doc.Close();
    string Path = Environment.GetFolderPath(Environment.SpecialFolder.Desktop)+ "\\AmitJain.pdf";



    ShowPdf(Path);


}

private void ShowPdf(string strS)
{
    Response.ClearContent();
    Response.ClearHeaders();
    Response.ContentType = "application/pdf";
    Response.AddHeader("Content-Disposition","attachment; filename=" + strS);
    Response.TransmitFile(strS);
    Response.End();
    //Response.WriteFile(strS);
    Response.Flush();
    Response.Clear();

}