所以这是一个我坚持的问题,我想得到一些帮助/想法,我该如何解决这个问题。
问题是我们得到一行字符串,其中包含这种可能格式的数据
输入样本#1:
100, 200, 300 Route 45
#1的输出
100, 200, 300 Route 45
100 Route 45
200 Route 45
300 Route 45
输入样本#2
1000 Wildforest Drive; 2000 Wildridge Circle
#2的输出:
1000 Wildforest Drive; 2000 Wildridge Circle
1000 Wildforest Drive
2000 Wildridge Circle
输入样本#3
100-107 and 109 Grove Hill Drive, 400-418, 420, 422, 424, 426, 428, 430, 432, 434 and 436-441 Olive Branch Way
#3的输出
100-107 Grove Hill Drive and 109 Grove Hill Drive, 400-418, 420, 422, 424, 426, 428, 430, 432, 434 and 436-441 Olive Branch Way
100-107 Grove Hill Drive
109 Grove Hill Drive
400-418
420, 422, 424, 426, 428, 430, 432, 434 and 436-441 Olive Branch Way
422 Olive Branch Way
424 Olive Branch Way
426 Olive Branch Way
428 Olive Branch Way
430 Olive Branch Way
434 Olive Branch Way
436-411 Olive Branch Way
我的尝试
Dim frowI As Long, i As Long, j As Long, frowO As Long, m As Long
Dim cet, fet, addR As String, stName As String
Sub Clean_Data()
frowI = INP.Range("A" & Rows.Count).End(xlUp).Row
For i = 2 To frowI
frowO = frowO + 1
addR = INP.Range("B" & i)
OUT.Range("C" & frowO) = addR
addR = Replace(addR, "and", ",")
Debug.Print addR
cet = Split(addR, ";")
For j = LBound(cet) To UBound(cet)
If InStr(cet(j), ",") > 0 Then
fet = Split(cet(j), ",")
For m = LBound(fet) To UBound(fet)
fet(m) = Trim(fet(m))
frowO = frowO + 1
OUT.Range("C" & frowO) = fet(m) & " " & stName
Next m
End If
Next j
Next i
End Sub
问题是我找不到从字符串中获取街道名称的方法。
感谢任何帮助。
答案 0 :(得分:2)
看看下面的内容,它使用正则表达式首先搜索字符串,然后搜索街道地址,然后再搜索建筑物编号。
有人可能能够提出更好的RegExp,但这在我的测试中有效。
Option Explicit
Sub Clean_Data()
Dim RegExStreet As Object, RegExNo As Object, MatchesStreet As Object, MatchesNo As Object
Dim rng As Range
Dim nme As String, tmp As String
Dim i As Long
Dim c, no, street
Set RegExStreet = CreateObject("vbscript.regexp")
Set RegExNo = CreateObject("vbscript.regexp")
With RegExStreet
.IgnoreCase = True
.Global = True
.Pattern = "([a-z]+\s[a-z]+\s[a-z]+|[a-z]+\s[a-z]+|[a-z]+\s\d+)"
End With
With RegExNo
.IgnoreCase = True
.Global = True
.Pattern = "(\d\-|\d|\w\d)+"
End With
With INP
Set rng = .Range(.Cells(1, 2), .Cells(.Cells(.Rows.Count, 2).End(xlUp).Row, 2))
End With
i = 2
For Each c In rng
nme = vbNullString
tmp = vbNullString
Set MatchesStreet = RegExStreet.Execute(Replace(Replace(c.Value2, " and ", vbNullString, compare:=vbTextCompare), " et al", vbNullString, compare:=vbTextCompare))
If MatchesStreet.Count > 0 Then
tmp = c.Value2
OUT.Cells(i, 3).Value2 = tmp
i = i + 1
For Each street In MatchesStreet
nme = street
Set MatchesNo = RegExNo.Execute(Left(tmp, InStr(1, tmp, street) - 1))
If MatchesNo.Count > 0 Then
For Each no In MatchesNo
OUT.Cells(i, 3).Value2 = no & " " & nme
i = i + 1
Next no
End If
tmp = Right(tmp, Len(tmp) - InStr(1, tmp, street))
Next street
End If
Next c
End Sub
另外,作为旁注,除非你真的需要,否则不要在变量之外声明变量。这可能会导致错误
答案 1 :(得分:1)
我发布这个,因为我的生命花了几分钟写。这不是我最优雅的作品。汤姆的答案可能更好,如果我更好地理解正则表达式,那就是我写它的方式。
Dim fRowO As Long
Dim strNum As String, strAddr As String, wrdaddress As String
Dim cl, wrds, wrd, nums, n
fRowO = 1
For Each cl In Range("A1:A3")
cl = Replace(cl, " and ", " , ")
cl = Replace(cl, ";", ",")
wrds = Split(cl, ",")
strNum = ""
strAddr = ""
For Each wrd In wrds
wrd = Trim(wrd)
If LCase(wrd) Like "*[a-z]*" Then
wrdaddress = Mid(wrd, InStr(wrd, " ") + 1, Len(wrd) - InStr(wrd, " ") + 1)
strNum = strNum & Left(wrd, InStr(wrd, " ") - 1)
strAddr = wrdaddress
nums = Split(strNum, ";")
For Each n In nums
If n <> "" Then
Cells(fRowO, 2) = n & " " & strAddr
fRowO = fRowO + 1
End If
Next
strNum = ""
strAddr = ""
Else
strNum = strNum & wrd & ";"
End If
Next
Next