Windows应用程序提取名称电子邮件ID,并从doc文件中联系no

时间:2011-03-01 12:21:07

标签: c# file window doc

我想从Word文档文件中提取名称,联系人号码和电子邮件ID,以便我可以提取任何一个帮助我的代码在这里

public void Opendoc(object file)
        {
            Microsoft.Office.Interop.Word.Application wpp = new Microsoft.Office.Interop.Word.Application();
            object nobj = System.Reflection.Missing.Value;

            Microsoft.Office.Interop.Word.Document doc = wpp.Documents.Open(ref file, ref nobj, ref nobj, ref nobj,
                            ref nobj, ref nobj, ref nobj, ref nobj, ref nobj, ref nobj,
                            ref nobj, ref nobj, ref nobj, ref nobj, ref nobj, ref nobj);
            int i = 1;
            List <string> emailCollection = new  List<string>();
            foreach (Microsoft.Office.Interop.Word.Paragraph objParagraph in doc.Paragraphs)
 try
                {
                    string emailaddress = document.Paragraphs[1].Range.Text;
                    emailaddress = EmailExtractot(emailaddress).TrimEnd();
                    if (IsEmail(emailaddress))
                    {
                        emailCollection.Add(emailaddress);
                    }
                }
                catch (Exception ex) { throw ex; } i++;

            } // close document and Quit Word 
            document.Close(ref nullobj, ref nullobj, ref nullobj);

        }

 public const string MatchEmailPattern = @"^(([\w-]+\.)+[\w-]+|([a-zA-Z]{1}|[\w-]{2,}))@"
     + @"((([0-1]?[0-9]{1,2}|25[0-5]|2[0-4][0-9])\.([0-1]?
                [0-9]{1,2}|25[0-5]|2[0-4][0-9])\."
     + @"([0-1]?[0-9]{1,2}|25[0-5]|2[0-4][0-9])\.([0-1]?
                [0-9]{1,2}|25[0-5]|2[0-4][0-9])){1}|"
     + @"([a-zA-Z]+[\w-]+\.)+[a-zA-Z]{2,4})$";
  public static bool IsEmail(string email)
        {
            if (email != null) return Regex.IsMatch(email, MatchEmailPattern);
            else return false;
        }

 private static string EmailExtractot(string orginal)
        {

            int index = orginal.IndexOf('@',' ');
            int beforeEmptySpace = orginal.Substring(0, index).LastIndexOf(' ');
            string spiled = orginal.Substring(index, (orginal.Length - index));
            int afterEmptySpace = spiled.IndexOf(' ');
            string emailAddress = orginal.Substring(beforeEmptySpace + 1, (index - beforeEmptySpace) + afterEmptySpace);
            return emailAddress;
        }

但此代码无效

1 个答案:

答案 0 :(得分:0)

    private void button1_Click(object sender, EventArgs e)
    {
        // Reference Microsoft.Office.Interop.Word

        // using System;
        // using System.IO;
        // using System.Windows.Forms;
        // using Microsoft.Office.Interop.Word; 

        MessageBox.Show(GetEmailAddress("C:\\Sample.docx"));
    }

    private string GetEmailAddress(string file)
    {

        string emails = "";

        // Open a doc file.
        Microsoft.Office.Interop.Word.Application application = new Microsoft.Office.Interop.Word.Application();
        Document document = application.Documents.Open(file);

        // Loop through all words in the document.
        int count = document.Words.Count;
        for (int i = 1; i <= count; i++)
        {
            // Write the word.
            string text = document.Words[i].Text;

            //Extract Emails
            if (document.Words[i].Text.Contains("@"))
            {
                emails += document.Words[i - 1].Text + text + document.Words[i + 1].Text + "; ";
            }
        }
        // Close word.
        application.Quit();
        return emails;
    }
}