我正在尝试创建一个Excel宏,它将维度值格式化为我们公司的格式。这样我们就可以轻松地将数据导入我们的系统,而无需手动完成数千个维度。我遇到了一些问题:
我们公司的Dimensions标准格式如下:
每个值最多可以有3个参数
Attribute1:Value1:Unit1;Attribute2:Value2:Unit2;Attribute3:Value3:Unit3
示例: 1“L x 2”W x 3“H 转换为长度:1:in;宽度:2:in;高度:3 :在
可能与之一起使用的值是:
我在过去一年中提到的一些变化包括:
产品尺寸的一小部分样本(注意不一致):
3 3/4" Width x 2 1/2" Height
L 4 3/4" x W 1 1/2" x H 3"
3 1/2" W x 2 1/8" H x 2 7/8" D
3 5/8" W x 2 1/2" H x 5/8" Depth
3 3/4" W x 1" H
1 1/4" W x 3 1/4" H
2 3/8" Diameter
3" W x 2 1/2" H
2" W x 3" H
2 1/2" W x 2" H
1 3/8" W x 2 1/8" H
3 1/2" W x 3 1/2" H
1 1/2" W x 3" H
2" W x 1 7/8" H x 1 1/2" D
4 3/4" W x 3 1/2" H
4 3/4" W x 4" H x 1 1/4" D
3 1/2" W x 3 1/2" H x 3 1/2" D
3-1/2" W x 2-3/4" H
3.5" W x 4" H
3" H
3 1/4" W x 2 1/4" H
4 7/16" W x 6 1/4" H
3 1/4" W x 3 1/4" H
5" W x 7" H
到目前为止,我已经提出了正则表达式(\d+(.| |/|)\d+((/)\d+|)|\d+)
,它似乎可以获取所有数字,但我不太确定如何找到所有不同的属性和单位。我认为唯一可行的就是lookbehinds&前瞻,但我对这种正则表达式的想法并不够精通。
问题1: 正则表达式是执行此任务的最佳方式还是有更好的方法?
问题2: 最终的问题是,我如何完成这项复杂的任务,或者甚至可以远程使用vba?
答案 0 :(得分:2)
您可以创建解析器/渲染,下面的示例显示了如何在基于RegEx的EBNF解析器中实现它,将代码放入标准VBA模块中:
Option Explicit
Private sBuffer As String
Private oTokens As Object
Private oRegEx As Object
Sub TestParserRender()
Dim sScr As String
Dim sResult As String
sScr = ReadTextFile(ThisWorkbook.Path & "\Source.txt", -2)
sResult = Parse(sScr)
WriteTextFile sResult, ThisWorkbook.Path & "\Result.txt", -1
End Sub
Function Parse(ByVal sSample As String) As String
' Init
sBuffer = sSample
Set oTokens = CreateObject("Scripting.Dictionary")
Set oRegEx = CreateObject("VBScript.RegExp")
With oRegEx
.Global = True
.MultiLine = True
.IgnoreCase = True
' Cast variations in attributes and units
.Pattern = "\bL\.(?=\s|$)|\bL\b"
sBuffer = .Replace(sBuffer, "Length")
.Pattern = "\bW\.(?=\s|$)|\bW\b"
sBuffer = .Replace(sBuffer, "Width")
.Pattern = "\bH\.(?=\s|$)|\bH\b|\bHeigth\b"
sBuffer = .Replace(sBuffer, "Height")
.Pattern = "\bRound\b"
sBuffer = .Replace(sBuffer, "Circumference")
.Pattern = "\bD\.(?=\s|$)|\bD\b|\bDeep\b"
sBuffer = .Replace(sBuffer, "Depth")
.Pattern = "\bDia\.(?=\s|$)|\bDiameter\b"
sBuffer = .Replace(sBuffer, "Dia")
.Pattern = "\bThick\b"
sBuffer = .Replace(sBuffer, "Thickness")
.Pattern = "(?:\""|'')(?=\s|$)"
sBuffer = .Replace(sBuffer, " in")
.Pattern = "\binch\b|\binches\b|\bin\.(?=\s|$)"
sBuffer = .Replace(sBuffer, "in")
.Pattern = "\bfeet\b"
sBuffer = .Replace(sBuffer, "ft")
' Tokenize instances
.Pattern = "<\d+[savedpun]>"
Tokenize "e" ' Escape reserved sequence
.Pattern = "\b(?:\d+((?:[ -]\d+)?(?:\/|\.)\d+)?)(?=\D)"
Tokenize "n" ' Number
.Pattern = "\b(?:Length|Width|Height|Arc|Area|Circumference|Depth|Dia|Thickness)\b"
Tokenize "a" ' Attribute
.Pattern = "\b(?:in|ft)\b"
Tokenize "u" ' Units
.Pattern = "<\d+n>[ \t]*<\d+u>"
Tokenize "v" ' Number + Unit = Value
.Pattern = "(<\d+v>)([ \t]*)(<\d+a>)"
sBuffer = .Replace(sBuffer, "$3$2$1") ' Swap Value + Attribute = Attribute + Value
.Pattern = "<\d+a>[ \t]*<\d+v>"
Tokenize "p" ' Attribute + Value = Parameter
.Pattern = "^[ \t]*<\d+p>(?:[ \t]*X[ \t]*<\d+p>){0,2}[ \t]*$"
Tokenize "d" ' Parameter X Parameter X Parameter = Dimension
.MultiLine = False
.Pattern = "^(?:\r\n)*<\d+d>(?:(?:\r\n)+<\d+d>)*(?:\r\n)*$"
Tokenize "s" ' Dimension * N times = Structure
.Pattern = "^<\d+s>$" ' Top level Structure single element
If .Test(sBuffer) And oTokens.Exists(sBuffer) Then
Parse = Retrieve(sBuffer) ' Render if success
Else
MsgBox "Parsing failed"
.Pattern = "^([\s\S]+?)(<\d+[savedpun]>)"
sBuffer = .Replace(sBuffer, "[$1]$2") ' Put failed from begin in brackets
.Pattern = "(<\d+[savedpun]>)([\s\S]+?)(?=<\d+[savedpun]>|$)"
sBuffer = .Replace(sBuffer, "$1[$2]") ' Put failed between tokens in brackets
.Pattern = "\[\r\n\]"
sBuffer = .Replace(sBuffer, vbCrLf) ' Recover dummy new lines in brackets
.Global = False
.Pattern = "<\d+[savedpun]>" ' Retrieve the rest tokens
Do
With .Execute(sBuffer)
If .Count = 0 Then Exit Do
sBuffer = Replace(sBuffer, .Item(0).value, oTokens(.Item(0).value))
End With
Loop
Parse = sBuffer
End If
End With
Set oTokens = Nothing
Set oRegEx = Nothing
End Function
Private Sub Tokenize(sType)
Dim aContent() As String
Dim lCopyIndex As Long
Dim i As Long
Dim sKey As String
With oRegEx.Execute(sBuffer)
If .Count = 0 Then Exit Sub
ReDim aContent(0 To .Count - 1)
lCopyIndex = 1
For i = 0 To .Count - 1
With .Item(i)
sKey = "<" & oTokens.Count & sType & ">"
oTokens(sKey) = .value
aContent(i) = Mid(sBuffer, lCopyIndex, .FirstIndex - lCopyIndex + 1) & sKey
lCopyIndex = .FirstIndex + .Length + 1
End With
Next
End With
sBuffer = Join(aContent, "") & Mid(sBuffer, lCopyIndex, Len(sBuffer) - lCopyIndex + 1)
End Sub
Private Function Retrieve(sTokenKey As String) As String
Dim sTokenValue As String
Dim aTokens() As String
Dim i As Long
Dim aContent() As String
sTokenValue = oTokens(sTokenKey)
Select Case Left(Right(sTokenKey, 2), 1)
Case "s", "d"
aTokens = Split(sTokenValue, "<")
ReDim aContent(UBound(aTokens) - 1)
For i = 1 To UBound(aTokens)
aContent(i - 1) = Retrieve("<" & Split(aTokens(i), ">", 2)(0) & ">")
Next
Retrieve = Join(aContent, IIf(Left(Right(sTokenKey, 2), 1) = "s", vbCrLf, ";"))
Case "p", "v"
aTokens = Split(sTokenValue, "<")
Retrieve = _
Retrieve("<" & Split(aTokens(1), ">", 2)(0) & ">") & _
":" & _
Retrieve("<" & Split(aTokens(2), ">", 2)(0) & ">")
Case "a", "u", "n"
Retrieve = sTokenValue
End Select
End Function
Function ReadTextFile(sPath As String, lFormat As Long) As String
' lFormat -2 - System default, -1 - Unicode, 0 - ASCII
With CreateObject("Scripting.FileSystemObject").OpenTextFile(sPath, 1, False, lFormat)
ReadTextFile = ""
If Not .AtEndOfStream Then ReadTextFile = .ReadAll
.Close
End With
End Function
Sub WriteTextFile(sContent As String, sPath As String, lFormat As Long)
With CreateObject("Scripting.FileSystemObject").OpenTextFile(sPath, 2, True, lFormat)
.Write (sContent)
.Close
End With
End Sub
将样本作为ANSI或Unicode保存到与Excel文件相同的文件夹中的文本文件Source.txt
,然后运行TestParserRender()
。输出将保存到文本文件Result.txt
。处理从解析开始。属性和单位的变化首先由RegEx替换。然后匹配RegEx图案部件折叠成令牌。错误的值+属性序列通过RegEx子匹配进行更正,通过替换进行交换。在解析结束时,应该保留单个顶级结构标记,否则引发错误。如果解析失败,则将无法识别的部分放入输出中的大括号中。如果它成功,那么使用渲染检索内容的反向过程将一直持续到最后一个标记。
大纲中的解析算法可以用下面的EBNF语法表示(简化,替换未显示):
structure ::= ( "\n\r" )* dimension ( ( "\n\r" )+ dimension )* ( "\n\r" )*
dimension ::= ( " " | "\t" )* parameter ( ( " " | "\t" )+ "X" ( " " | "\t" )+ parameter )? ( ( " " | "\t" )+ "X" ( " " | "\t" )+ parameter )? ( " " | "\t" )*
parameter ::= attribute ( " " | "\t" )* value
attribute ::= "\b" ( "Length" | "Width" | "Height" | "Arc" | "Area" | "Circumference" | "Depth" | "Dia" | "Thickness" ) "\b"
value ::= number ( " " | "\t" ) unit
number ::= digits ( ( ( ( ' ' | '-' ) digits )? '/' | '.' ) digits )?
digits ::= digit+
digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
unit ::= "\b" ( "in" | "ft" ) "\b"
和相关的diagram:
您提供的样本的输出如下:
Width:3 3/4:in;Height:2 1/2:in
Length:4 3/4:in;Width:1 1/2:in;Height:3:in
Width:3 1/2:in;Height:2 1/8:in;Depth:2 7/8:in
Width:3 5/8:in;Height:2 1/2:in;Depth:5/8:in
Width:3 3/4:in;Height:1:in
Width:1 1/4:in;Height:3 1/4:in
Dia:2 3/8:in
Width:3:in;Height:2 1/2:in
Width:2:in;Height:3:in
Width:2 1/2:in;Height:2:in
Width:1 3/8:in;Height:2 1/8:in
Width:3 1/2:in;Height:3 1/2:in
Width:1 1/2:in;Height:3:in
Width:2:in;Height:1 7/8:in;Depth:1 1/2:in
Width:4 3/4:in;Height:3 1/2:in
Width:4 3/4:in;Height:4:in;Depth:1 1/4:in
Width:3 1/2:in;Height:3 1/2:in;Depth:3 1/2:in
Width:3-1/2:in;Height:2-3/4:in
Width:3.5:in;Height:4:in
Height:3:in
Width:3 1/4:in;Height:2 1/4:in
Width:4 7/16:in;Height:6 1/4:in
Width:3 1/4:in;Height:3 1/4:in
Width:5:in;Height:7:in
BTW我在VBA JSON parser中使用了相同的方法。