在C#中将MS字表转换为html

时间:2014-05-06 05:25:20

标签: c# html ms-word html-table

我正在用C#编写Word Addin,将所有格式替换为xml标签, 现在我希望将word中的表转换为带有标准标签的html, 表的行数和列数可能不同, 我的意思是表包含合并的单元格或列

相同的:

-------------------------
|  1  |  2  |  3  |  4  |
|     -------------------
|     |  5  |  6  |  7  |
|     -------------------
|     |        8        |
|     -------------------
|     |  9  | 10  | 11  |
|------------------------
| 12  | 13  | 14  | 15  |
-------------------------

单元格1是一列中的四行合并 而单元格8是一行中三列的合并

我该如何转换它?

3 个答案:

答案 0 :(得分:1)

前一段时间我们遇到过类似的项目,希望下面的代码能为您提供一个开始。 HTML部分

<span style="font-size:18px;"><div>  
    <input id="File1" type="file" runat="server"/>  
    <asp:Button ID="btnConvert" runat="server" Text="Convert" OnClick="btnConvert_Click" />  
</div></span> 

C#部分     使用系统;
    使用System.Data;
    使用System.Configuration;
    使用System.Collections;
    使用System.Collections.Generic;
    使用System.Linq;
    使用System.Web;
    使用System.Web.Security;
    使用System.Web.UI;
    使用System.Web.UI.WebControls;
    使用System.Web.UI.WebControls.WebParts;
    使用System.Web.UI.HtmlControls;
    使用System.IO;

protected void Page_Load(object sender, EventArgs e)  
        {  

        }  

        /// <summary>  
        /// word to Html  
        /// </summary>  
        /// <param name="sender"></param>  
        /// <param name="e"></param>  
        protected void btnConvert_Click(object sender, EventArgs e)  
        {  
            try  
            {  

                //upload  
                //uploadWord(File1);  
                //convert  
                wordToHtml(File1);  
            }  
            catch (Exception ex)  
            {  
                throw ex;  
            }  
            finally  
            {  
                Response.Write("Convert successfully!");  
            }  

        }  

        //upload and convert to html wordToHtml(wordFilePath)  
        ///<summary>  
        ///upload and save as html  
        ///</summary>  
        ///<param name="wordFilePath">word doc file path on client machine</param>  
        ///<returns>html save path</returns>  
        public string wordToHtml(System.Web.UI.HtmlControls.HtmlInputFile wordFilePath)  
        {  
            Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();  
            Type wordType = word.GetType();  
            Microsoft.Office.Interop.Word.Documents docs = word.Documents;  

            // open doc file  
            Type docsType = docs.GetType();  

            //upload to server and parse as html  
            string filePath = uploadWord(wordFilePath);  

            //if upload is success  
            if (filePath == "0")  
                return "0";  
            //if file is word  
            if (filePath == "1")  
                return "1";  

            object fileName = filePath;  

            Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",  
            System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });  

            // convert and save  
            Type docType = doc.GetType();  

            string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +  
            System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();  

            // if directory exist, create if not  
            if (!Directory.Exists(Server.MapPath("~\\html")))  
            {  
                // create upload directory  
                Directory.CreateDirectory(Server.MapPath("~\\html"));  
            }  

            //html save location  
            string ConfigPath = HttpContext.Current.Server.MapPath("html/" + filename + ".html");  
            object saveFileName = ConfigPath;  

            /*For Microsoft Word 9 Object Library, if using 10, then use below: 
         * docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, 
         * null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML}); 
         * other formats: 
         * wdFormatHTML 
         * wdFormatDocument 
         * wdFormatDOSText 
         * wdFormatDOSTextLineBreaks 
         * wdFormatEncodedText 
         * wdFormatRTF 
         * wdFormatTemplate 
         * wdFormatText 
         * wdFormatTextLineBreaks 
         * wdFormatUnicodeText 
         */  
            docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,  
            null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });  

            //close document 
            docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,  
            null, doc, new object[] { null, null, null });  

            // exit Word  
            wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);  
            //goto converted html page  
            return ("/" + filename + ".html");  

        }  


        public string uploadWord(System.Web.UI.HtmlControls.HtmlInputFile uploadFiles)  
        {  
            if (uploadFiles.PostedFile != null)  
            {  
                string fileName = uploadFiles.PostedFile.FileName;  

                int extendNameIndex = fileName.LastIndexOf(".");  
                string extendName = fileName.Substring(extendNameIndex);  
                string newName = "";  
                try  
                {  
                    //check if is word format  
                    if (extendName == ".doc" || extendName == ".docx")  
                    {  

                        DateTime now = DateTime.Now;  
                        newName = now.DayOfYear.ToString() + uploadFiles.PostedFile.ContentLength.ToString();  

                        // check if directory exist, create one if not  
                        if (!Directory.Exists(Server.MapPath("~\\wordTmp")))  
                        {  
                            // create upload directory  
                            Directory.CreateDirectory(Server.MapPath("~\\wordTmp"));  
                        }  

                        //upload path, wordTemp relative to parrent  
                        uploadFiles.PostedFile.SaveAs(System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName));  
                    }  
                    else  
                    {  
                        return "1";  
                    }  
                }  
                catch  
                {  
                    return "0";  
                }  
                //return "http://" + HttpContext.Current.Request.Url.Host + HttpContext.Current.Request.ApplicationPath + "/wordTmp/" + newName + extendName;  
                return System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName);  
            }  
            else  
            {  
                return "0";  
            }  
        }</span>  

答案 1 :(得分:0)

这是一个简单的技巧,只需复制word table&amp;粘贴在dreamweaver - &gt;设计模式。 当您在代码模式中看到时,您将从那里获得所有html标签,您只需复制该代码即可。粘贴在c#文件中。

享受。

答案 2 :(得分:0)

tanx回复, 我找到了一种将单词表转换为html的方法。
我写这段代码:

private static void ConvertTableToHTML()
    {
        try
        {
            foreach (Table tb in Common.WordApplication.ActiveDocument.Tables)
            {

                for (int r = 1; r <= tb.Rows.Count; r++)
                {
                    for (int c = 1; c <= tb.Columns.Count; c++)
                    {
                        try
                        {
                            Cell cell = tb.Cell(r, c);
                            foreach (Paragraph paragraph in cell.Range.Paragraphs)
                            {
                                Tagging(paragraph.Range, "P");
                            }
                            Tagging(cell.Range, "TD");                                
                        }
                        catch (Exception e)
                        {
                            if (e.Message.Contains("The requested member of the collection does not exist."))
                            {
                                //Most likely a part of a merged cell, so skip over.
                            }
                            else throw;
                        }
                    }
                    try
                    {
                        Row row = tb.Rows[r];
                        Tagging(row.Range, "TR");                            
                    }
                    catch (Exception ex)
                    {
                        bool initialTrTagInserted = false;
                        int columnsIndex = 1;
                        int columnsCount = tb.Columns.Count;
                        while (!initialTrTagInserted && columnsIndex <= columnsCount)
                        {
                            try
                            {
                                Cell cell = tb.Cell(r, columnsIndex);
                                cell.Range.InsertBefore("<TR>");
                                initialTrTagInserted = true;
                            }
                            catch (Exception e)
                            {
                            }
                            columnsIndex++;
                        }

                        columnsIndex = tb.Columns.Count;
                        bool endTrTagInserted = false;
                        while (!endTrTagInserted && columnsIndex >= 1)
                        {
                            try
                            {
                                Cell cell = tb.Cell(r, columnsIndex);
                                cell.Range.InsertAfter("</TR>");
                                endTrTagInserted = true;
                            }
                            catch (Exception e)
                            {
                            }
                            columnsIndex--;
                        }
                    }
                }
                Common.Tagging2(tb.Range, "Table");                    

                object separator = "";
                object nestedTable = true;
                tb.ConvertToText(separator, nestedTable);
            }
        }
        catch (Exception ex) { MessageBox.Show(ex.Message); }
    }


public static void Tagging(Range range, string TagName)
    {
        try
        {
            range.InsertBefore("<" + TagName + ">");
            range.InsertAfter("</" + TagName + ">");
        }
        catch (Exception ex) { throw new Exception(ex.Message); }
    }