我有几个Word文档,每个文档包含几百页的科学数据,其中包括:
问题是,以Word的形式存储这些数据对我们来说效率不高。所以我们希望将所有这些信息存储在数据库(MySQL)中。我们想将这些格式转换为LaTex。
有没有办法遍历所有的子脚本&上标&使用VBA的公式?
如何迭代数学方程?
答案 0 :(得分:10)
根据迈克尔的comment回答
没有!我只想用_ {替换下标中的内容 subscriptcontent}和类似的上标内容^ { 上标内容}。这将是Tex的等价物。现在,我会 将所有内容复制到文本文件中,这将删除格式但是 留下这些人物。问题解决了。但为此,我需要访问 下标&文件的上标对象
Sub sampler()
Selection.HomeKey wdStory
With Selection.find
.ClearFormatting
.Replacement.ClearFormatting
.Font.Superscript = True
.Replacement.Text = "^^{^&}"
.Execute Replace:=wdReplaceAll
.Font.Subscript = True
.Replacement.Text = "_{^&}"
.Execute Replace:=wdReplaceAll
End With
End Sub
修改强>
或者如果您还想将OMaths
转换为TeX / LaTeX
,请执行以下操作:
对于一个完全不同的想法,请访问David Carlisle's blog,您可能感兴趣。
<强>更新强>
<强> The module
强>
Option Explicit
'This module requires the following references:
'Microsoft Scripting Runtime
'MicroSoft XML, v6.0
Private fso As New Scripting.FileSystemObject
Private omml2mml$, mml2Tex$
Public Function ProcessFile(fpath$) As Boolean
'convPath set to my system at (may vary on your system):
omml2mml = "c:\program files\microsoft office\office14\omml2mml.xsl"
'download: http://prdownloads.sourceforge.net/xsltml/xsltml_2.0.zip
'unzip at «c:\xsltml_2.0»
mml2Tex = "c:\xsltml_2.0\mmltex.xsl"
Documents.Open fpath
'Superscript + Subscript
Selection.HomeKey wdStory
With Selection.find
.ClearFormatting
.Replacement.ClearFormatting
'to make sure no paragraph should contain any emphasis
.Text = "^p"
.Replacement.Text = "^&"
.Replacement.Font.Italic = False
.Replacement.Font.Bold = False
.Replacement.Font.Superscript = False
.Replacement.Font.Subscript = False
.Replacement.Font.SmallCaps = False
.Execute Replace:=wdReplaceAll
.Font.Italic = True
.Replacement.Text = "\textit{^&}"
.Execute Replace:=wdReplaceAll
.Font.Bold = True
.Replacement.Text = "\textbf{^&}"
.Execute Replace:=wdReplaceAll
.Font.SmallCaps = True
.Replacement.Text = "\textsc{^&}"
.Execute Replace:=wdReplaceAll
.Font.Superscript = True
.Replacement.Text = "^^{^&}"
.Execute Replace:=wdReplaceAll
.Font.Subscript = True
.Replacement.Text = "_{^&}"
.Execute Replace:=wdReplaceAll
End With
Dim dict As New Scripting.Dictionary
Dim om As OMath, t, counter&, key$
key = Replace(LCase(Dir(fpath)), " ", "_omath_")
counter = 0
For Each om In ActiveDocument.OMaths
DoEvents
counter = counter + 1
Dim tKey$, texCode$
tKey = "<" & key & "_" & counter & ">"
t = om.Range.WordOpenXML
texCode = TransformString(TransformString(CStr(t), omml2mml), mml2Tex)
om.Range.Select
Selection.Delete
Selection.Text = tKey
dict.Add tKey, texCode
Next om
Dim latexDoc$, oPath$
latexDoc = "\documentclass[10pt]{article}" & vbCrLf & _
"\usepackage[utf8]{inputenc} % set input encoding" & vbCrLf & _
"\usepackage{amsmath,amssymb}" & vbCrLf & _
"\begin{document}" & vbCrLf & _
"###" & vbCrLf & _
"\end{document}"
oPath = StrReverse(Mid(StrReverse(fpath), InStr(StrReverse(fpath), "."))) & "tex"
'ActiveDocument.SaveAs FileName:=oPath, FileFormat:=wdFormatText, Encoding:=1200
'ActiveDocument.SaveAs FileName:=oPath, FileFormat:=wdFormatText, Encoding:=65001
ActiveDocument.Close
Dim c$, i
c = fso.OpenTextFile(oPath).ReadAll()
counter = 0
For Each i In dict
counter = counter + 1
Dim findText$, replaceWith$
findText = CStr(i)
replaceWith = dict.item(i)
c = Replace(c, findText, replaceWith, 1, 1, vbTextCompare)
Next i
latexDoc = Replace(latexDoc, "###", c)
Dim ost As TextStream
Set ost = fso.CreateTextFile(oPath)
ost.Write latexDoc
ProcessFile = True
End Function
Private Function CreateDOM()
Dim dom As New DOMDocument60
With dom
.async = False
.validateOnParse = False
.resolveExternals = False
End With
Set CreateDOM = dom
End Function
Private Function TransformString(xmlString$, xslPath$) As String
Dim xml, xsl, out
Set xml = CreateDOM
xml.LoadXML xmlString
Set xsl = CreateDOM
xsl.Load xslPath
out = xml.transformNode(xsl)
TransformString = out
End Function
<强> The calling(from immediate window):
强>
?ProcessFile("c:\test.doc")
结果将在test.tex
中创建为c:\
。
模块可能需要修复某些地方。如果是这样,请告诉我。
答案 1 :(得分:2)
Word中的Document对象有一个oMaths集合,它表示文档中的所有oMath对象。 oMath对象包含Functions方法,该方法将返回oMath对象中的函数集合。因此,方程式不应该是一个大问题。
我想你想要捕获的不仅仅是下标和上标,你想要包含那些子和上标的整个等式。这可能更具挑战性,因为您必须定义起点和终点。如果您要使用.Find方法查找下标,然后选择它之前的第一个空格字符和它之后的第一个空格字符之间的所有内容,那可能会有效,但前提是您的等式不包含空格。
答案 2 :(得分:1)
此VBA子应遍历文档中的每个文本字符,并在插入LaTeX表示法时删除上标和下标。
Public Sub LatexConversion()
Dim myRange As Word.Range, myChr
For Each myRange In ActiveDocument.StoryRanges
Do
For Each myChr In myRange.Characters
If myChr.Font.Superscript = True Then
myChr.Font.Superscript = False
myChr.InsertBefore "^"
End If
If myChr.Font.Subscript = True Then
myChr.Font.Subscript = False
myChr.InsertBefore "_"
End If
Next
Set myRange = myRange.NextStoryRange
Loop Until myRange Is Nothing
Next
End Sub
如果某些方程式是使用Word内置的公式编辑器或通过构建块(Word 2010/2007)创建的并且存在于内容控件中,则上述方法将无效。在执行上述方程之前,这些方程式要么需要单独的VBA转换代码,要么手动转换为仅文本方程式。
答案 3 :(得分:1)
使用Open XML SDK将OpenMat(Math)C#实现到LaTex。 从这里下载MILTEX XSL文件http://sourceforge.net/projects/xsltml/
public void OMathTolaTeX()
{
string OMath = "";
string MathML = "";
string LaTex = "";
XslCompiledTransform xslTransform = new XslCompiledTransform();
// The MML2OMML.xsl file is located under
// %ProgramFiles%\Microsoft Office\Office12\
// Copy to Local folder
xslTransform.Load(@"D:\OMML2MML.XSL");
using (WordprocessingDocument wordDoc =
WordprocessingDocument.Open("test.docx", true))
{
OpenXmlElement doc = wordDoc.MainDocumentPart.Document.Body;
foreach (var par in doc.Descendants<Paragraph>())
{
var math in par.Descendants<DocumentFormat.OpenXml.Math.Paragraph>().FirstOrDefault();
File.WriteAllText("D:\\openmath.xml", math.OuterXml);
OMath = math.OuterXml;
}
}
//Load OMath string into stream
using (XmlReader reader = XmlReader.Create(new StringReader(OMath)))
{
using (MemoryStream ms = new MemoryStream())
{
XmlWriterSettings settings = xslTransform.OutputSettings.Clone();
// Configure xml writer to omit xml declaration.
settings.ConformanceLevel = ConformanceLevel.Fragment;
settings.OmitXmlDeclaration = true;
XmlWriter xw = XmlWriter.Create(ms, settings);
// Transform our MathML to OfficeMathML
xslTransform.Transform(reader, xw);
ms.Seek(0, SeekOrigin.Begin);
StreamReader sr = new StreamReader(ms, Encoding.UTF8);
MathML= sr.ReadToEnd();
Console.Out.WriteLine(MathML);
File.WriteAllText("d:\\MATHML.xml", MathML);
// Create a OfficeMath instance from the
// OfficeMathML xml.
sr.Close();
reader.Close();
ms.Close();
// Add the OfficeMath instance to our
// word template.
}
}
var xmlResolver = new XmlUrlResolver();
xslTransform = new XslCompiledTransform();
XsltSettings xsltt = new XsltSettings(true, true);
// The mmtex.xsl file is to convert to Tex
xslTransform.Load("mmltex.xsl", xsltt, xmlResolver);
using (XmlReader reader = XmlReader.Create(new StringReader(MathML)))
{
using (MemoryStream ms = new MemoryStream())
{
XmlWriterSettings settings = xslTransform.OutputSettings.Clone();
// Configure xml writer to omit xml declaration.
settings.ConformanceLevel = ConformanceLevel.Fragment;
settings.OmitXmlDeclaration = true;
XmlWriter xw = XmlWriter.Create(ms, settings);
// Transform our MathML to OfficeMathML
xslTransform.Transform(reader, xw);
ms.Seek(0, SeekOrigin.Begin);
StreamReader sr = new StreamReader(ms, Encoding.UTF8);
LaTex = sr.ReadToEnd();
sr.Close();
reader.Close();
ms.Close();
Console.Out.WriteLine(LaTex);
File.WriteAllText("d:\\Latex.txt", LaTex);
// Create a OfficeMath instance from the
// OfficeMathML xml.
// Add the OfficeMath instance to our
// word template.
}
}
}
希望这有助于C#开发人员。