我见过使用Word 9.0对象库的示例。但我在VS2010中拥有Office 2010 Beta和.NET 4.0。有关如何使用新Word Dlls的任何提示?
所以我只是想用.NET3.5或更高版本获得RTF到TEXT的功能。
答案 0 :(得分:10)
我使用TextRange获得了更好的WPF解决方案。
FlowDocument document = new FlowDocument();
//Read the file stream to a Byte array 'data'
TextRange txtRange = null;
using (MemoryStream stream = new MemoryStream(data))
{
// create a TextRange around the entire document
txtRange = new TextRange(document.ContentStart, document.ContentEnd);
txtRange.Load(stream, DataFormats.Rtf);
}
现在您可以在documentTextRange.Text
中看到提取的文本答案 1 :(得分:5)
你是否真的将.RTF加载到Word中? .net具有可以处理.RTF文件的RichTextBox控件。请参阅此处:http://msdn.microsoft.com/en-us/library/1z7hy77a.aspx(如何:将文件加载到Windows窗体RichTextBox控件中)
答案 2 :(得分:1)
public enum eFileType
{
Invalid = -1,
TextDocument = 0,
RichTextDocument,
WordDocument
}
public interface IRead
{
string Read(string file);
}
public static class FileManager
{
public static eFileType GetFileType(string extension)
{
var type = eFileType.Invalid;
switch (extension)
{
case ".txt": type = eFileType.TextDocument;
break;
case ".rtf": type = eFileType.RichTextDocument;
break;
case ".docx": type = eFileType.WordDocument;
break;
}
return type;
}
}
public class TextDocument : IRead
{
public string Read(string file)
{
try
{
var reader = new StreamReader(file);
var content = reader.ReadToEnd();
reader.Close();
return content;
}
catch
{
return null;
}
}
}
public class RichTextDocument : IRead
{
public string Read(string file)
{
try
{
var wordApp = new Application();
object path = file;
object nullobj = System.Reflection.Missing.Value;
var doc = wordApp.Documents.Open(ref path,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj);
var result = wordApp.ActiveDocument.Content.Text;
var doc_close = (_Document)doc;
doc_close.Close();
return result;
}
catch
{
return null;
}
}
}
public class WordDocument : IRead
{
public string Read(string file)
{
try
{
var wordApp = new Application();
object path = file;
object nullobj = System.Reflection.Missing.Value;
var doc = wordApp.Documents.Open(ref path,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj,
ref nullobj);
var result = wordApp.ActiveDocument.Content.Text;
var doc_close = (_Document)doc;
doc_close.Close();
return result;
}
catch
{
return null;
}
}
}
public class Factory
{
public IRead Get(eFileType type)
{
IRead read = null;
switch (type)
{
case eFileType.RichTextDocument: read = new RichTextDocument();
break;
case eFileType.WordDocument: read = new WordDocument();
break;
case eFileType.TextDocument: read = new TextDocument();
break;
}
return read;
}
}
public class ResumeReader
{
IRead _read;
public ResumeReader(IRead read)
{
if (read == null) throw new InvalidDataException("read cannot be null");
_read = read;
}
public string Read(string file)
{
return _read.Read(file);
}
}
编辑以纠正语法突出显示
答案 3 :(得分:0)
如果有人需要ASP.NET的解决方案,我找到了一个完美的解决方案:
添加对System.Windows.Forms
或download the DLL itself的引用并对其进行引用。
接下来,您可以通过创建临时RichTextBox
来提取文本:
RichTextBox box = new RichTextBox();
box.Rtf = File.ReadAllText(Path);
string text = box.Text;