如何加速Word Interop处理?

时间:2016-04-09 15:49:11

标签: c#

我是C#的新手,写了一个相当笨重的代码。我一直在网上做很多课程,很多人说有几种方法可以解决问题。现在我已经制作了一个程序,它将加载.Doc Word文件,然后使用if语句搜索相关信息。

现在我的问题是我的解决方案是这个程序需要永远!我说的是30分钟 - 1月完成以下代码。

如何让我的小程序变得不那么笨重?我希望这方面的解决方案能够大大提高我的知识,所以提前感谢大家!

问候 克里斯

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;

namespace WindowsFormsApplication3
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }
        public int id = 0;
        public int[] iD = new int[100];
        public string[] timeOn = new string[100];
        public string[] timeOff = new string[100];
        public string[] dutyNo = new string[100];
        public string[] day = new string[100];

        private void button1_Click(object sender, EventArgs e)
        {



            Microsoft.Office.Interop.Word.Application application = new Microsoft.Office.Interop.Word.Application();
            Microsoft.Office.Interop.Word.Document document = application.Documents.Open("c:\\Users\\Alien\\Desktop\\TESTJOBS.doc");
            //the following for will loop for all words

            int count = document.Words.Count;
            for (int i = 1; i <= count; i++)
            {
                // the following if statement will look for the first word that is On
                // this is then (on the file) proceded by  04:00 (thus i+2/3/4 respectively)
                if (document.Words[i].Text == "On")
                {
                    iD[id] = id;
                   // Console.WriteLine("ID Number ={0}", iD[id]);
                    dutyNo[id] = document.Words[i - 14].Text;
                   // Console.WriteLine("duty No set to:{0}", dutyNo[id]);
                    timeOn[id] = document.Words[i + 2].Text + document.Words[i + 3].Text + document.Words[i + 4].Text;
                   // Console.WriteLine("on time set to:{0}", timeOn[id]);
                    // the following if (runs if the last word was not "On" and then searches for the word "Off" which procedes "On" in the file format)
                    // this is then (on the file) proceded by  04:00 (thus i+2/3/4 respectively)
                }
                else if (document.Words[i].Text == "Off")
                {
                    timeOff[id] = document.Words[i + 2].Text + document.Words[i + 3].Text + document.Words[i + 4].Text;
                    //Console.WriteLine("off time set to:{0}", timeOff[id]);
                    // the following if (runs if the last word was not "Off" and then searches for the word "Duty" which procedes "Off" in the file format)
                    // this is then (on the file) proceded by  04:00 (thus i+2/3/4 respectively)
                }
                else if (document.Words[i].Text == "Days" && !(document.Words[i + 3].Text == "Type"))
                {

                    day[id] = document.Words[i + 2].Text;
                    //Console.WriteLine("day set to:{0}", day[id]);
                    //we then print the whole new duty out to ListBox1
                    listBox1.Items.Add(string.Format("new duty ID:{0} Time on:{1} Time off:{2} Duty No:{3} Day:{4}", iD[id], timeOn[id], timeOff[id], dutyNo[id], day[id]));
                    id++;
                }


            }

            for (int i = 1; i <= 99; i++)
            {
                Console.WriteLine("new duty ID:{0} Time on:{1} Time off:{2} Duty No:{3} Day:{4}", iD[id], timeOn[id], timeOff[id], dutyNo[id], day[id]);
            }


        }
    }
}

4 个答案:

答案 0 :(得分:3)

Office Interop是fairly slow

Openxml可能是faster,但文件是.doc,所以它可能无法处理它。

但就像在this question中使用Excel一样,您可以通过某种方式提高效果 - 不要通过索引访问Range中的每个单词,因为它会导致AFAIK创建一个包含在Range中的单独RCW实例,这是应用程序中性能瓶颈的主要候选者。

这意味着,改善效果的最佳选择是加载所有单词(.Text)到实际的String s 之前的一些可索引集合中处理,然后才使用该集合创建输出。

如何以最快的方式完成?我不太确定,但您可以尝试从_Document.Words enumerator获取所有单词(虽然它可能会或可能不会更高效,但至少您将能够看到需要多长时间只需检索所需的单词):

var words = document
    .Cast<Range>()
    .Select(r => 
        r.Text)
    .ToList();

或者您可以尝试使用_Document.Content范围Text,但您必须自己将单个单词分开。

答案 1 :(得分:1)

确定已完成,所以我们现在处理所有信息,然后仍然导入整个文档。总运行时间为02:09.8,共2780个句子,约44,000个单词(包括空格!)以下是我的(不完美的代码),不错,考虑到我在2周前拿起C#;)希望这可以帮助将来的人。

    public Form1()
    {
        InitializeComponent();
    }
    public int id = 0;
    public int[] iD = new int[400];
    public string[] timeOn = new string[400];
    public string[] timeOff = new string[400];
    public string[] dutyNo = new string[400];
    public string[] day = new string[400];
    public string[] hours = new string[400];

    //Create File Location Var
    public string fileLocation = null;

    // On Click of Add Dutys
    private void button1_Click(object sender, EventArgs e)
    {
        //Sets Progress Bar visible and prepares to increment
        pBar1.Visible = true;
        pBar1.Minimum = 1;
        pBar1.Value = 1;
        pBar1.Step = 1;


        //Stopwatch test Declared
        Stopwatch stopWatch = new Stopwatch();

        try {
            //Self Test to see if a File Location has been set for Duty document.
            if (fileLocation == null) {
                //If not set prompts user with message box and brings up file explorer
                MessageBox.Show("It Appears that a file location has not yet been set, Please Select one now.");
                Stream myStream = null;
                OpenFileDialog openFileDialog1 = new OpenFileDialog();
                //Sets default Location and Default File type as .doc
                openFileDialog1.InitialDirectory = "c:\\";
                openFileDialog1.Filter = "All files (*.*)|*.*|Word Files (*.doc)|*.doc";
                openFileDialog1.FilterIndex = 2;
                openFileDialog1.RestoreDirectory = true;
                //Waits for User to Click ok in File explorer and then Sets file location to var
                if (openFileDialog1.ShowDialog() == DialogResult.OK)
                {
                    try
                    {
                        //Checks to make sure a file location is set
                        if ((myStream = openFileDialog1.OpenFile()) != null)
                        {
                            using (myStream)
                            {
                                //This is where we set location to var
                                fileLocation = openFileDialog1.FileName;
                            }
                            //Prompts user to click a file before OK
                        }else { MessageBox.Show("Please Select a file location before clicking ok"); }
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show("Error: Could not read file from disk: " + ex.Message);
                    }
                }
            }

           //Loads New Duty file 
            Microsoft.Office.Interop.Word.Application application = new Microsoft.Office.Interop.Word.Application();
            Microsoft.Office.Interop.Word.Document document = application.Documents.Open(fileLocation);
            //Begin stop watch (COPY TIME)
            stopWatch.Start();

            //Sets Count to No of sentences and then prepares Array using Number of sentences 
            //**This process reduces amount of processng time by taking everything in to the program to start and then dealing with it.
            int count = document.Sentences.Count;
            string[] sents = new string[count];
            //Then sets the Progress bar to the Number of sentences that will be Copied to our array
            pBar1.Maximum = count;

            try {
                //For loop runs throug every sentence and adds it to the array.
                for (int i = 0; i < count; i++) {
                    sents[i] = document.Sentences[i+1].Text;
                    //increment Progress bar by 1 for every sentence(Parse made)
                    pBar1.PerformStep();
                }
                //Closes our instance of word
                application.Quit();
                try {

                    for (int i = 0; i < count; i++)
                    {
                        //Sets our Split criteria 
                        char[] delimiterChars = { ' ','\t' };
                        string[] test = (sents[i].Split(delimiterChars));
                        //we then enter For loop that runs for the number of ords found/Split
                        for (int a = 0; a < test.Length; a++)
                        {  
                            //If tests only begin if the word is NOT a space blank, tab , - As these do parse through into our Test arrays
                            if (!(test[a] == "" || test[a].Contains("/t")|| test[a].Contains("-") || test[a].Contains(" ")))
                            {
                                //If tests to find Duty numbers ours on off and assigns ID number for easy indexing. 
                                //##THIS DOES ASSUME THAT OUR TIMES ARE 1 SPACE AFTER THEIR IDENTIFIERS.
                                if (test[a] == "TG")
                                {
                                    dutyNo[id] = test[a + 2]; 
                                }
                                else if (test[a] == "On")
                                {
                                    iD[id] = id;
                                    timeOn[id] = test[a + 1];
                                }
                                else if (test[a] == "Off")
                                {
                                    timeOff[id] = test[a + 1];
                                }
                                else if (test[a] == "Hrs")
                                {
                                    hours[id] = test[a + 1];
                                }
                                else if (test[a] == "Days")
                                {
                                    day[id] = test[a + 1];
                                    //PRINTS TO USER VIA LIST BOX ALL THE DUTYS ADDED.
                                    listBox1.Items.Add(string.Format("ADDED:Duty No:{3} Time on:{1} Time off:{2} Hours{5} Day:{4} ID:{0}", iD[id], timeOn[id], timeOff[id], dutyNo[id], day[id], hours[id]));
                                    id++;
                                }

                            }
                        }
                    }
                }
                catch(Exception ex) { MessageBox.Show("Error in split:" + ex.Message); }
            }
            catch(Exception ex) { MessageBox.Show("error setting string to Document:" + ex.Message); }
            //Stopwatch Is then printed for testing purposes.
            TimeSpan ts = stopWatch.Elapsed;
            string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds,
            ts.Milliseconds / 10);
            Console.WriteLine("RunTime (total):" + elapsedTime);

            stopWatch.Reset();

        }
        catch(Exception ex) { MessageBox.Show("Error in reading/finding file: "+ ex.Message); }

    }


}

}

我将所有这些代码用于一个相当大的列表框(ListBox1),一个Button(Button1)和一个不可见的启动进度条(pBar1)。

答案 2 :(得分:0)

而不是使用:

document.Words[i].Text

多次,做:

String Text = document.Words[i].Text;
在for循环的顶部

并使用&#34; Text&#34; (或者你想称之为的任何东西)。尤金·波德斯卡尔的建议看起来非常有用,但这种简单的改进(我在看到尤金的回应之前就已经考虑过了)很容易做到,并且可以做出实质性的改进。

答案 3 :(得分:0)

您可以使用OpenXml加载整个.Content范围并对其进行处理,然后重新导入