我对编程比较陌生,我写了一个Microsoft Word VBA宏,它提取了一个"部件列表"来自专利说明书(活动文件的文本),其中列表中的每个部件参考都以基本方式标识为任何看起来像部件或特征的数字或全部大写字母标识符,前面有最多四个字的任何内容在同一个句子里。
到目前为止我成功完成的工作是自动打开一个新的Word文档并逐行插入所有唯一的零件参考,格式如
" 10:提供缝纫机10,"或者" Q:热通量Q."
我在每行的开头重复标识符,以便标识符在左边距处显示对齐。
我也希望它们按标识符排序,这是我的正则表达式MatchCollection m的m.SubMatches(2)。首先是数字顺序的数字,然后字母顺序的alpha引用会很好。
有关如何解决这个问题的任何建议?这是一个代码片段,它使用简单的冒泡排序算法对整个m.Value进行排序,而无需将数字标识符转换为Long值:
Sub ExtractPartsList()
Dim re As VBScript_RegExp_55.RegExp
Set re = New VBScript_RegExp_55.RegExp
re.pattern = "((?:[A-Z]*[a-z]+[\s\n]+){0,3})(?=[A-Z]*[a-z]+[\s\n]+(?:\d+\b|[A-Z]+\b))" + _
"(\b[A-Z]*[a-z]+[\s\n]+)(\b\d+\b'*|[A-Z]+\b'*)" + _
"((?:\,[\s\n]+(?:\d+|[A-Z]+\b))+(?:\,?[\s\n]+and[\s\n+](?:\d+|[A-Z]+\b))?)?(?:[\s\n]+and[\s\n]+(?:\d+|[A-Z]+\b))?"
' m.Value is the whole matched string
' m.SubMatches(1) is the word immediately preceding the part number / alpha reference
' m.SubMatches(2) is the part number / alpha reference
re.IgnoreCase = False
re.Global = True
Dim txt As String
Dim bigString As String
bigString = ""
Dim allLongMatches As MatchCollection, m As Match
Dim partNameLastWord As String
Dim partReference As String
Dim partNameAndReference As String
Dim partsColl As New Collection
Dim partsList() As String
Dim i As Long
txt = ActiveDocument.Range.text
If re.Test(txt) Then
Set allLongMatches = re.Execute(txt)
Documents.Add DocumentType:=wdNewBlankDocument
For Each m In allLongMatches
Debug.Print m.Value, "Sbm 1 = " + m.SubMatches(1), "Sbm 2 = " + m.SubMatches(2), "Sbm 3 = " + m.SubMatches(3)
If InStr(bigString, LCase(m.SubMatches(1) + m.SubMatches(2))) = 0 _
And InStr(LCase(m.Value), "of claim " + m.SubMatches(2)) = 0 _
And InStr(LCase(m.SubMatches(2)), "fig") = 0 Then
bigString = bigString + LCase(m.Value)
partsColl.Add m.SubMatches(2) + ": " + m.Value
End If
Next m
End If
ReDim partsList(1 To partsColl.Count)
For i = 1 To partsColl.Count
partsList(i) = partsColl(i)
Next i
' BubbleSort (partsList())
' Instead of calling BubbleSort (partsList())
' I apparently still have to learn how to properly call methods I
' have written - for now I am just embedding it here:
Dim strTemp As String
' Dim i As Long
Dim j As Long
Dim lngMin As Long
Dim lngMax As Long
lngMin = LBound(partsList())
lngMax = UBound(partsList())
For i = lngMin To lngMax - 1
For j = i + 1 To lngMax
If partsList(i) > partsList(j) Then
strTemp = partsList(i)
partsList(i) = partsList(j)
partsList(j) = strTemp
End If
Next j
Next i
For i = 1 To partsColl.Count
Selection.InsertAfter (partsList(i))
Selection.InsertParagraphAfter
Next i
End Sub
Sub BubbleSort(arr)
Dim strTemp As String
Dim i As Long
Dim j As Long
Dim lngMin As Long
Dim lngMax As Long
lngMin = LBound(arr)
lngMax = UBound(arr)
For i = lngMin To lngMax - 1
For j = i + 1 To lngMax
If arr(i) > arr(j) Then
strTemp = arr(i)
arr(i) = arr(j)
arr(j) = strTemp
End If
Next j
Next i
End Sub
来自美国专利的示例输入。美国专利号6,293,874:
"第二柱44定位在距第一柱24足够的距离处,以允许使用者要求使用者在柱24,44之间的弯腰位置处向前弯腰。因此,使用者是定位成主要将他或她的臀部B朝向多个旋转臂56展开,所述多个旋转臂56可拆卸地安装在第二柱44上,其高度与使用者的臀部大致齐平。第二柱44可通过可拆卸的套环46和连接器螺栓或螺钉安装在平台12的表面上。
输出(只能很好地工作,因为数字是相同的长度 - 我想它实际上是排序"按字母顺序排列,"其中" 2"将在" 19之后出现, "例如):
' 12: surface of the platform 12
' 24: from the first post 24
' 24: position between the posts 24, 44
' 44: The second post 44
' 46: by a detachable collar 46
' 56: plurality of rotating arms 56
' B: his or her buttocks B
答案 0 :(得分:0)
我制作了一个笨重的解决方案,它通过自己创建一个单独的部件标识符数组,并与标识符数组id()As Long并行排序partsList()数组。现在将alpha标识符设置为零,并让它们渗透到顶部未排序;他们通常不足以担心按字母顺序排序。我不愿意将此标记为答案,因为我仍然希望看到有人会使用更优雅/直接的解决方案。
Sub ExtractPartsList()
Dim re As VBScript_RegExp_55.RegExp
Set re = New VBScript_RegExp_55.RegExp
re.pattern = "((?:[A-Z]*[a-z]+[\s\n]+){0,3})(?=[A-Z]*[a-z]+[\s\n]+(?:\d+\b|[A-Z]+\b))" + _
"(\b[A-Z]*[a-z]+[\s\n]+)(\b\d+\b'*|[A-Z]+\b'*)" + _
"((?:\,[\s\n]+(?:\d+|[A-Z]+\b))+(?:\,?[\s\n]+and[\s\n+](?:\d+|[A-Z]+\b))?)?(?:[\s\n]+and[\s\n]+(?:\d+|[A-Z]+\b))?"
' m.Value is the whole matched string
' m.SubMatches(1) is the word immediately preceding the part number / alpha reference
' m.SubMatches(2) is the part number / alpha reference
re.IgnoreCase = False
re.Global = True
Dim txt As String
Dim bigString As String
bigString = ""
Dim allLongMatches As MatchCollection, m As Match
Dim partNameLastWord As String
Dim partReference As String
Dim partNameAndReference As String
Dim partsColl As New Collection
Dim idColl As New Collection
' for now not using this variable:
' Dim referenceTextColl As New Collection
Dim partsList() As String
Dim id() As Long
' Dim referenceText() As String
' Dim partsListSorted() As String
Dim i As Long
txt = ActiveDocument.Range.text
If re.Test(txt) Then
Set allLongMatches = re.Execute(txt)
Documents.Add DocumentType:=wdNewBlankDocument
For Each m In allLongMatches
Debug.Print m.Value, "Sbm 1 = " + m.SubMatches(1), "Sbm 2 = " + m.SubMatches(2), "Sbm 3 = " + m.SubMatches(3)
If InStr(bigString, LCase(m.SubMatches(1) + m.SubMatches(2))) = 0 _
And InStr(LCase(m.Value), "of claim " + m.SubMatches(2)) = 0 _
And InStr(LCase(m.SubMatches(2)), "fig") = 0 Then
bigString = bigString + LCase(m.Value)
partsColl.Add m.SubMatches(2) + ": " + m.Value
idColl.Add (m.SubMatches(2))
' referenceTextColl.Add (m.Value)
' Selection.InsertAfter (m.SubMatches(2) + ": ")
' Selection.InsertAfter (m.Value)
' Selection.InsertParagraphAfter
End If
Next m
End If
ReDim partsList(1 To partsColl.Count)
ReDim id(1 To partsColl.Count)
' ReDim referenceText(1 To partsColl.Count)
For i = 1 To partsColl.Count
partsList(i) = partsColl(i)
id(i) = 0
' Deal with "prime" symbols #' and convert numeric identifiers to Long:
If IsNumeric(Replace(idColl(i), "'", "")) Then id(i) = CLng(Replace(idColl(i), "'", ""))
referenceText(i) = referenceTextColl(i)
Next i
'
' I apparently still have to learn how to properly call methods I
' have written - I am just embedding a bubble sort algorithm here instead:
Dim idTemp As String
Dim referenceTemp As String
Dim partsListLineTemp As String
' Dim i As Long
Dim j As Long
Dim lngMin As Long
Dim lngMax As Long
lngMin = LBound(partsList())
lngMax = UBound(partsList())
For i = lngMin To lngMax - 1
For j = i + 1 To lngMax
If id(i) > id(j) Then
idTemp = id(i)
partsList(i) = partsList(j)
id(i) = id(j)
partsList(j) = partsListLineTemp
id(j) = idTemp
End If
Next j
Next i
For i = 1 To partsColl.Count
Selection.InsertAfter (partsList(i))
Selection.InsertParagraphAfter
Next i
partsListLineTemp = partsList(i)
End Sub