阅读word文档标题下的所有段落,并将其导出到c#windows中的excel

时间:2013-11-14 06:27:40

标签: c# ms-word office-interop

以下代码读取word文档中每个标题下的段落,直到达到回车符。回车后仍无法读取段落,但仍在同一标题下。有没有办法实现这一目标。可以告诉我它是否可行。

foreach (Microsoft.Office.Interop.Word.Paragraph paragraph in Doc.Paragraphs)
{
    Microsoft.Office.Interop.Word.Style style = 
        paragraph.get_Style() as Microsoft.Office.Interop.Word.Style;
    string styleName = style.NameLocal;
    string text = paragraph.Range.Text.Trim();
    string[] words = text.Split('.');

    if (styleName.Contains("Heading") 
        || styleName.Contains("Heading1") 
        || styleName.Contains("Heading2") 
        || styleName.Contains("Heading3"))
    {
        foreach (string word in words)
        {
            if(paragraph.Next() !=null)
            {
                int j = 1;
                string data = paragraph.Next().Range.Text.ToString().Trim();
                // string h = paragraph.Next().Range.Tables.ToString().Trim();
                string[] dataf = data.Split('.');
                foreach (string dat in dataf)
                {
                    paracount.Add(word + j, dat);
                    j++;
                }
            }
        }

1 个答案:

答案 0 :(得分:0)

 //This will return you headers and text below of corrousponding header
   private List<Tuple<string, string>> GetPlainTextByHeaderFromWordDoc(string docname)
{
    #region for Plain text collection from document
    List<Tuple<string, string>> docPlainTextWithHeaderList = new List<Tuple<string, string>>();

    string headerText = string.Empty;
    string finalTextBelowHeader = string.Empty;

    try
    {
        Document doc = ReadMsWord(docname, objCommonVariables);

        if (doc.Paragraphs.Count > 0)
        {
            //heading with 1st paragraph
            foreach (Paragraph paragraph in doc.Paragraphs)
            {
                Style style = paragraph.get_Style() as Style;
                headerText = string.Empty;
                finalTextBelowHeader = string.Empty;

                if (style.NameLocal == "Heading 1")
                {
                headerText = paragraph.Range.Text.TrimStart().TrimEnd();

                    //reading 1st paragraph of each section
                    for (int i = 0; i < doc.Paragraphs.Count; i++)
                    {
                        if (paragraph.Next(i) != null)
                        {
                            Style yle = paragraph.Next(i).get_Style() as Style;
                            if (yle.NameLocal != "Heading 1")
                            {
                                finalTextBelowHeader += paragraph.Next(i).Range.Text.ToString();
                            }
                            else if (yle.NameLocal == "Heading 1" && !headerText.Contains(paragraph.Next(i).Range.Text.ToString()))
                            {
                                break;
                            }
                        }
                    }

                    string header = Regex.Replace(headerText, "[^a-zA-Z\\s]", string.Empty).TrimStart().TrimEnd();
                    string belowText = Regex.Replace(finalTextBelowHeader, @"\s+", String.Empty);
                    belowText = belowText.Trim().Replace("\a", string.Empty);

                    docPlainTextWithHeaderList.Add(new Tuple<string, string>(header, belowText));

                }
            }
        }
        else
        {
         //error msg: unable to read
        }

        doc.Close(Type.Missing, Type.Missing, Type.Missing);
    }
    catch (Exception ex)
    {
        MessageBox.Show(ex.StackTrace);
    }
}

   //This will read and return word document
   private Document ReadMsWord(string docName)
    {
   Document docs = new Document();
   try
   {
    // variable to store file path
    string FilePath = @"C:\Kaustubh_Tupe\WordRepository/docName.docx";
    // create word application
    Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.Application();
    // create object of missing value
    object miss = System.Reflection.Missing.Value;
    // create object of selected file path
    object path = FilePath;
    // set file path mode
    object readOnly = false;
    // open Destination                
    docs = word.Documents.Open(ref path, ref miss, ref readOnly,
        ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss,
        ref miss, ref miss, ref miss, ref miss, ref miss);

    //select whole data from active window Destination
    docs.ActiveWindow.Selection.WholeStory();
    // handover the data to cllipboard
    docs.ActiveWindow.Selection.Copy();
    // clipboard create reference of idataobject interface which transfer the data
}

catch (Exception ex)
{
    //MessageBox.Show(ex.ToString());
}
return docs;

}