我有几千个单元格,我需要从中提取文档编号。单元格没有任何格式,但每个文档编号都以" Doc# - "开头。 例如,
B2:
This is an example cell showing a doc number DOC#-12351-2432-1b and a second document DOC#-2342-RTF-DD-09, there may be several more or only one.
我正在寻找一种方法来将所有文档编号作为单独的数字提取到单独的单元格,行等中。最好的方法是什么?
目前我有一些非常粗糙的公式。
=MID($B2,find("DOC#-",$b2,1),FIND(" ",$b2,find("DOC#-",$b2,1))-find("Doc#-",$b2,1)
其中:找到第一个文档,在其后面找到一个空格,用MID返回数字。
思考VBA可能是解决这个问题的方法,但我很难理解。
答案 0 :(得分:3)
选择您要处理的单元格并运行这个小宏:
Sub qwerty()
Dim i As Long, V As String, K As Long
For Each r In Selection
V = Replace(r.Value, ",", " ")
ary = Split(V, "DOC#-")
K = 1
For i = 1 To UBound(ary)
bry = Split(ary(i), " ")
bry(0) = "DOC#-" & bry(0)
r.Offset(0, K).Value = bry(0)
K = K + 1
Next i
Next r
End Sub
它会查找空格或逗号来终止文档编号。以下是输入/输出的示例:
答案 1 :(得分:0)
用于速度的变量数组/ regexp方法
Sub GetDoc()
Dim X
Dim objRegex As Object
Dim objRegexMC As Object
Dim objRegexM As Object
Dim lngCnt As Long
Dim lngCnt2 As Long
X = Range([a1], Cells(Rows.Count, "A").End(xlUp))
ReDim Preserve X(1 To UBound(X, 1), 1 To 100)
Set objRegex = CreateObject("vbscript.regexp")
With objRegex
.Global = True
.ignorecase = True
.Pattern = "DOC#[-\w]+"
For lngCnt = 1 To UBound(X)
If .test(X(lngCnt, 1)) Then
lngCnt2 = 2
Set objRegexMC = .Execute(X(lngCnt, 1))
For Each objRegexM In objRegexMC
X(lngCnt, lngCnt2) = objRegexM
lngCnt2 = lngCnt2 + 1
Next
End If
Next
End With
[a1].Resize(UBound(X, 1), UBound(X, 2)) = X
End Sub