我是C#的新手,写了一个相当笨重的代码。我一直在网上做很多课程,很多人说有几种方法可以解决问题。现在我已经制作了一个程序,它将加载.Doc Word文件,然后使用if语句搜索相关信息。
现在我的问题是我的解决方案是这个程序需要永远!我说的是30分钟 - 1月完成以下代码。
如何让我的小程序变得不那么笨重?我希望这方面的解决方案能够大大提高我的知识,所以提前感谢大家!
问候 克里斯
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace WindowsFormsApplication3
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
public int id = 0;
public int[] iD = new int[100];
public string[] timeOn = new string[100];
public string[] timeOff = new string[100];
public string[] dutyNo = new string[100];
public string[] day = new string[100];
private void button1_Click(object sender, EventArgs e)
{
Microsoft.Office.Interop.Word.Application application = new Microsoft.Office.Interop.Word.Application();
Microsoft.Office.Interop.Word.Document document = application.Documents.Open("c:\\Users\\Alien\\Desktop\\TESTJOBS.doc");
//the following for will loop for all words
int count = document.Words.Count;
for (int i = 1; i <= count; i++)
{
// the following if statement will look for the first word that is On
// this is then (on the file) proceded by 04:00 (thus i+2/3/4 respectively)
if (document.Words[i].Text == "On")
{
iD[id] = id;
// Console.WriteLine("ID Number ={0}", iD[id]);
dutyNo[id] = document.Words[i - 14].Text;
// Console.WriteLine("duty No set to:{0}", dutyNo[id]);
timeOn[id] = document.Words[i + 2].Text + document.Words[i + 3].Text + document.Words[i + 4].Text;
// Console.WriteLine("on time set to:{0}", timeOn[id]);
// the following if (runs if the last word was not "On" and then searches for the word "Off" which procedes "On" in the file format)
// this is then (on the file) proceded by 04:00 (thus i+2/3/4 respectively)
}
else if (document.Words[i].Text == "Off")
{
timeOff[id] = document.Words[i + 2].Text + document.Words[i + 3].Text + document.Words[i + 4].Text;
//Console.WriteLine("off time set to:{0}", timeOff[id]);
// the following if (runs if the last word was not "Off" and then searches for the word "Duty" which procedes "Off" in the file format)
// this is then (on the file) proceded by 04:00 (thus i+2/3/4 respectively)
}
else if (document.Words[i].Text == "Days" && !(document.Words[i + 3].Text == "Type"))
{
day[id] = document.Words[i + 2].Text;
//Console.WriteLine("day set to:{0}", day[id]);
//we then print the whole new duty out to ListBox1
listBox1.Items.Add(string.Format("new duty ID:{0} Time on:{1} Time off:{2} Duty No:{3} Day:{4}", iD[id], timeOn[id], timeOff[id], dutyNo[id], day[id]));
id++;
}
}
for (int i = 1; i <= 99; i++)
{
Console.WriteLine("new duty ID:{0} Time on:{1} Time off:{2} Duty No:{3} Day:{4}", iD[id], timeOn[id], timeOff[id], dutyNo[id], day[id]);
}
}
}
}
答案 0 :(得分:3)
Office Interop是fairly slow。
Openxml可能是faster,但文件是.doc,所以它可能无法处理它。
但就像在this question中使用Excel一样,您可以通过某种方式提高效果 - 不要通过索引访问Range
中的每个单词,因为它会导致AFAIK创建一个包含在Range
中的单独RCW实例,这是应用程序中性能瓶颈的主要候选者。
这意味着,改善效果的最佳选择是加载所有单词(.Text
)到实际的String
s 之前的一些可索引集合中处理,然后才使用该集合创建输出。
如何以最快的方式完成?我不太确定,但您可以尝试从_Document.Words
enumerator获取所有单词(虽然它可能会或可能不会更高效,但至少您将能够看到需要多长时间只需检索所需的单词):
var words = document
.Cast<Range>()
.Select(r =>
r.Text)
.ToList();
或者您可以尝试使用_Document.Content
范围Text
,但您必须自己将单个单词分开。
答案 1 :(得分:1)
确定已完成,所以我们现在处理所有信息,然后仍然导入整个文档。总运行时间为02:09.8,共2780个句子,约44,000个单词(包括空格!)以下是我的(不完美的代码),不错,考虑到我在2周前拿起C#;)希望这可以帮助将来的人。
public Form1()
{
InitializeComponent();
}
public int id = 0;
public int[] iD = new int[400];
public string[] timeOn = new string[400];
public string[] timeOff = new string[400];
public string[] dutyNo = new string[400];
public string[] day = new string[400];
public string[] hours = new string[400];
//Create File Location Var
public string fileLocation = null;
// On Click of Add Dutys
private void button1_Click(object sender, EventArgs e)
{
//Sets Progress Bar visible and prepares to increment
pBar1.Visible = true;
pBar1.Minimum = 1;
pBar1.Value = 1;
pBar1.Step = 1;
//Stopwatch test Declared
Stopwatch stopWatch = new Stopwatch();
try {
//Self Test to see if a File Location has been set for Duty document.
if (fileLocation == null) {
//If not set prompts user with message box and brings up file explorer
MessageBox.Show("It Appears that a file location has not yet been set, Please Select one now.");
Stream myStream = null;
OpenFileDialog openFileDialog1 = new OpenFileDialog();
//Sets default Location and Default File type as .doc
openFileDialog1.InitialDirectory = "c:\\";
openFileDialog1.Filter = "All files (*.*)|*.*|Word Files (*.doc)|*.doc";
openFileDialog1.FilterIndex = 2;
openFileDialog1.RestoreDirectory = true;
//Waits for User to Click ok in File explorer and then Sets file location to var
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
try
{
//Checks to make sure a file location is set
if ((myStream = openFileDialog1.OpenFile()) != null)
{
using (myStream)
{
//This is where we set location to var
fileLocation = openFileDialog1.FileName;
}
//Prompts user to click a file before OK
}else { MessageBox.Show("Please Select a file location before clicking ok"); }
}
catch (Exception ex)
{
MessageBox.Show("Error: Could not read file from disk: " + ex.Message);
}
}
}
//Loads New Duty file
Microsoft.Office.Interop.Word.Application application = new Microsoft.Office.Interop.Word.Application();
Microsoft.Office.Interop.Word.Document document = application.Documents.Open(fileLocation);
//Begin stop watch (COPY TIME)
stopWatch.Start();
//Sets Count to No of sentences and then prepares Array using Number of sentences
//**This process reduces amount of processng time by taking everything in to the program to start and then dealing with it.
int count = document.Sentences.Count;
string[] sents = new string[count];
//Then sets the Progress bar to the Number of sentences that will be Copied to our array
pBar1.Maximum = count;
try {
//For loop runs throug every sentence and adds it to the array.
for (int i = 0; i < count; i++) {
sents[i] = document.Sentences[i+1].Text;
//increment Progress bar by 1 for every sentence(Parse made)
pBar1.PerformStep();
}
//Closes our instance of word
application.Quit();
try {
for (int i = 0; i < count; i++)
{
//Sets our Split criteria
char[] delimiterChars = { ' ','\t' };
string[] test = (sents[i].Split(delimiterChars));
//we then enter For loop that runs for the number of ords found/Split
for (int a = 0; a < test.Length; a++)
{
//If tests only begin if the word is NOT a space blank, tab , - As these do parse through into our Test arrays
if (!(test[a] == "" || test[a].Contains("/t")|| test[a].Contains("-") || test[a].Contains(" ")))
{
//If tests to find Duty numbers ours on off and assigns ID number for easy indexing.
//##THIS DOES ASSUME THAT OUR TIMES ARE 1 SPACE AFTER THEIR IDENTIFIERS.
if (test[a] == "TG")
{
dutyNo[id] = test[a + 2];
}
else if (test[a] == "On")
{
iD[id] = id;
timeOn[id] = test[a + 1];
}
else if (test[a] == "Off")
{
timeOff[id] = test[a + 1];
}
else if (test[a] == "Hrs")
{
hours[id] = test[a + 1];
}
else if (test[a] == "Days")
{
day[id] = test[a + 1];
//PRINTS TO USER VIA LIST BOX ALL THE DUTYS ADDED.
listBox1.Items.Add(string.Format("ADDED:Duty No:{3} Time on:{1} Time off:{2} Hours{5} Day:{4} ID:{0}", iD[id], timeOn[id], timeOff[id], dutyNo[id], day[id], hours[id]));
id++;
}
}
}
}
}
catch(Exception ex) { MessageBox.Show("Error in split:" + ex.Message); }
}
catch(Exception ex) { MessageBox.Show("error setting string to Document:" + ex.Message); }
//Stopwatch Is then printed for testing purposes.
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds,
ts.Milliseconds / 10);
Console.WriteLine("RunTime (total):" + elapsedTime);
stopWatch.Reset();
}
catch(Exception ex) { MessageBox.Show("Error in reading/finding file: "+ ex.Message); }
}
}
}
我将所有这些代码用于一个相当大的列表框(ListBox1),一个Button(Button1)和一个不可见的启动进度条(pBar1)。
答案 2 :(得分:0)
而不是使用:
document.Words[i].Text
多次,做:
String Text = document.Words[i].Text;
在for循环的顶部并使用&#34; Text&#34; (或者你想称之为的任何东西)。尤金·波德斯卡尔的建议看起来非常有用,但这种简单的改进(我在看到尤金的回应之前就已经考虑过了)很容易做到,并且可以做出实质性的改进。
答案 3 :(得分:0)
您可以使用OpenXml加载整个.Content范围并对其进行处理,然后重新导入