解析大文本文件并将信息保存在表中

时间:2014-06-06 10:19:40

标签: regex vba ms-access text-parsing

我有一个大文本文件(已经是10Mb)。像这样的东西:

FESTWERT BMW_2
     LANGNAME "DFES / Gruppenstellvertreter für gemeinsame MIl Entprellung von DFC_DMTLmax"
   FUNKTION DFES 
     EINHEIT_W "-"
   WERT 253.25
END


KENNFELD CrCtl_StM.CrCtl_facPwrMaxaDem_MAP 6 6
   LANGNAME "GRA (Geschwindigkeits Regel Automat) - Zustandsautomat / leistungsabhängiger Faktor für max. Sollbeschleuniung"
   FUNKTION CrCtl_Gov 
   EINHEIT_X "km/h"
   EINHEIT_Y "kW"
   EINHEIT_W "-"
   ST/X   20.0000000000000000   50.0000000000000000   80.0000000000000000   120.0000000000000000   150.0000000000000000   200.0000000000000000   
   ST/Y   85.0000000000000000
   WERT   1.0000000000000000   1.0000000000000000   1.0000000000000000   0.7500000000000000   0.5468750000000000   0.2031250000000000   
   ST/Y   92.0000000000000000
   WERT   1.0000000000000000   1.0000000000000000   1.0000000000000000   0.7500000000000000   0.5468750000000000   0.2031250000000000   
   ST/Y   103.0000000000000000
   WERT   1.0000000000000000   1.0000000000000000   1.0000000000000000   0.7500000000000000   0.5468750000000000   0.2031250000000000   
   ST/Y   110.0000000000000000
   WERT   1.0000000000000000   1.0000000000000000   1.0000000000000000   0.7500000000000000   0.5468750000000000   0.2031250000000000   
   ST/Y   125.0000000000000000
   WERT   1.0000000000000000   1.0000000000000000   1.0000000000000000   0.7500000000000000   0.5468750000000000   0.2031250000000000   
   ST/Y   132.0000000000000000
   WERT   1.0000000000000000   1.0000000000000000   1.0000000000000000   0.7500000000000000   0.5468750000000000   0.2031250000000000   
END

我想将这些文本结构保存在访问数据库的表中:

enter image description here

我正在使用正则表达式来读取每一行以创建我想要的结构,然后将其保存在数据库中(我在此文本文件中有许多结构形式)

我正在使用此代码:

Option Compare Database

Sub ImportDcmlDaten(path As String, ID As Long)
Dim fso As New FileSystemObject
Dim ts As TextStream
Dim nameknl, nameknf, namefknl, namefwr, lien, DCMfilename As String
Dim testid As Integer

Dim regknf As New regexp
Dim regstx As New regexp
Dim regend As New regexp
Dim regxnum As New regexp
Dim regsty As New regexp
Dim regwert As New regexp
Dim regwertnum As New regexp
Dim regknl As New regexp
Dim regname As New regexp
Dim regfknl As New regexp
Dim regfstwrt As New regexp
Dim rega2l As New regexp
Dim regprodat As New regexp
Dim rega2lhex As New regexp

Dim regtxt As New regexp
Dim regwtxt As New regexp

Dim regeinheitx As New regexp
Dim regx As New regexp
Dim regeinheity As New regexp
Dim regeinheitwert As New regexp
Dim regfunktion As New regexp

   Dim swknnf, swknl, swfst As Boolean

Dim matchkennfeld, matchstx, matchend, matchxnum, matchynum, matchsty, matchwert, matchwertnum, matchkennlinie, matchname, matchfestkennlinie, matchprodat, matchfstwert, matcha2lhex, matchtxt As MatchCollection
Dim stxnums(0 To 1000) As String
Dim wertnums(0 To 1000) As String
Dim stynums(0 To 1000) As String
Dim X As Integer
Dim mycollection As New Collection

   Dim db As DAO.Database
Dim qry As DAO.QueryDef

Set db = CurrentDb


DCMfilename = fso.GetFileName(path)
   Set ts = fso.OpenTextFile(path, ForReading)

Set qry = db.QueryDefs("Test_qr_emptyDCM")
qry.Parameters("fzg_ID").Value = ID
Set rs = qry.OpenRecordset



'On Error GoTo Errhandler

regknf.Pattern = "KENNFELD\s+([\w|\s]*)"
regname.Pattern = "[\w|\s]*"
regknl.Pattern = "KENNLINIE\s+([\w|\s]*)"
regfstwrt.Pattern = "FESTWERT\s+([\w|\s]*)"
regfknl.Pattern = "FESTKENNLINIE\s+([\w|\s]*)"
regstx.Pattern = "(ST/X)\s*(-?[\d]*(\.)?[\d]*\s*)+"
regend.Pattern = "(END)"
regxnum.Pattern = "-?\d{1,}\.{0,1}\d{0,}"
regsty.Pattern = "(ST/Y)\s*(-?[\d]*(\.)?[\d]*\s*)+"
regwert.Pattern = "\bWERT\b\s*(-?[\d]*(\.)?[\d]*\s*)+"
regprodat.Pattern = "(Datensatz:|Projekt:)[\s\w*,*]*[\\\w]*"
rega2lhex.Pattern = "[\\][\w]*"
regxnum.Global = True
'regwertnum.Pattern = "-?\d{1,}\.{0,1}\d{0,}"
regwertnum.Global = True

regeinheitx.Pattern = "EINHEIT_X\s+[\""?\w\/\s\-\_]*"
regeinheity.Pattern = "EINHEIT_Y\s+[\""?\w\/\s\_\-]*"
regeinheitwert.Pattern = "EINHEIT_W\s+[\""?\w\/\s\-\_]*"
regfunktion.Pattern = "FUNKTION\s+[\""?\w\/\s\-\_]*"

regtxt.Pattern = "\bTEXT\b\s+[\""?\w\/\s\-\_]*"
regwtxt.Pattern = "\s+[\""?\w\/\s\-\_]*"
Do While Not ts.AtEndOfStream

line = ts.ReadLine



'specifying von KENNFELD Block
Set matchkennfeld = regknf.Execute(line)
Set matchend = regend.Execute(line)
Set matchstx = regstx.Execute(line)
Set matchsty = regsty.Execute(line)
Set matchwert = regwert.Execute(line)
Set matchkennlinie = regknl.Execute(line)
Set matchfestkennlinie = regfknl.Execute(line)
Set matchfstwert = regfstwrt.Execute(line)
Set matchprodat = regprodat.Execute(line)

Set matcheinheitx = regeinheitx.Execute(line)
Set matcheinheity = regeinheity.Execute(line)
Set matcheinheitwert = regeinheitwert.Execute(line)
Set matchfunktion = regfunktion.Execute(line)

Set matchtxt = regtxt.Execute(line)

If matchprodat.Count <> 0 Then
Set matcha2lhex = rega2lhex.Execute(matchprodat.Item(0))
 DCMfilename = DCMfilename + "<>" + Mid(matcha2lhex.Item(0), 2)
  If rs.Fields(0) = 0 Then
       MsgBox "Hier darf man nicht eine neue DCM einfügen"
       Exit Sub
  Else
       'MsgBox DCMfilename
       '
  End If
End If

If matchkennfeld.Count <> 0 Then
Set nameknf = regname.Execute(Mid(Trim(matchkennfeld.Item(0)), 9))
  swknnf = True
  X = 0
  W = 0
End If

If matcheinheitx.Count <> 0 And (swknnf = True Or swknl = True) Then
 Einheitx = Mid(Trim(matcheinheitx.Item(0)), 11)
End If

If matcheinheity.Count <> 0 And (swknnf = True) Then
 EinheitY = Mid(Trim(matcheinheity.Item(0)), 11)
End If

If matcheinheitwert.Count <> 0 And (swknnf = True Or swknl = True Or swfst = True) Then
 Einheitwert = Mid(Trim(matcheinheitwert.Item(0)), 11)
End If

If matchfunktion.Count <> 0 And (swknnf = True Or swknl = True Or swfst = True) Then
 Funktion = Mid(Trim(matchfunktion.Item(0)), 9)
End If

If matchfstwert.Count <> 0 Then
Set namefwr = regname.Execute(Mid(Trim(matchfstwert.Item(0)), 9))
swfst = True
End If

If matchkennlinie.Count <> 0 Then
  Set nameknl = regname.Execute(Mid(Trim(matchkennlinie.Item(0)), 10))
  swknl = True
End If

If matchfestkennlinie.Count <> 0 Then
  Set namefknl = regname.Execute(Mid(Trim(matchkennlinie.Item(0)), 14))
  swknl = True
End If

If matchend.Count <> 0 Then
 If swknnf = True Then
   'DoCmd.RunSQL ("INSERT INTO tb_DCM_Daten_info (EinheitX,EinheitY,Einheitwert,Name,Funktion,FzgID) VALUES('" & Trim(Einheitx) & "','" & Trim(EinheitY) & "','" & Trim(Einheitwert) & "','" & Trim(nameknf.Item(0)) & "','" & Trim(Funktion) & "','" & ID & "');")
   db.Execute ("INSERT INTO tb_DCM_Daten_info (EinheitX,EinheitY,Einheitwert,Name,Funktion,FzgID) VALUES('" & Trim(Einheitx) & "','" & Trim(EinheitY) & "','" & Trim(Einheitwert) & "','" & Trim(nameknf.Item(0)) & "','" & Trim(Funktion) & "','" & ID & "');")
 End If

 If swfst = True Then
   'DoCmd.RunSQL ("INSERT INTO tb_DCM_Daten_info (Einheitwert,Name,Funktion,FzgID) VALUES('" & Trim(Einheitwert) & "','" & Trim(namefwr.Item(0)) & "','" & Trim(Funktion) & "','" & ID & "');")
   db.Execute ("INSERT INTO tb_DCM_Daten_info (Einheitwert,Name,Funktion,FzgID) VALUES('" & Trim(Einheitwert) & "','" & Trim(namefwr.Item(0)) & "','" & Trim(Funktion) & "','" & ID & "');")
 End If


 If swknl = True Then
    For K = 0 To X - 1
     ' MsgBox nameknl.Item(0) + ":" + stxnums(K) + ":" + wertnums(K)
    ' DoCmd.RunSQL ("INSERT INTO tb_DCM_Daten (XValue,Wert,name,FzgID) VALUES ('" & stxnums(K) & "','" & wertnums(K) & "','" & Trim(nameknl.Item(0)) & "','" & ID & "');")
     db.Execute ("INSERT INTO tb_DCM_Daten (XValue,Wert,name,FzgID) VALUES ('" & stxnums(K) & "','" & wertnums(K) & "','" & Trim(nameknl.Item(0)) & "','" & ID & "');")
    Next K
    'DoCmd.RunSQL ("INSERT INTO tb_DCM_Daten_info (EinheitX,EinheitY,Einheitwert,Name,Funktion,FzgID) VALUES('" & Trim(Einheitx) & "','" & Trim(EinheitY) & "','" & Trim(Einheitwert) & "','" & Trim(nameknl.Item(0)) & "','" & Trim(Funktion) & "','" & ID & "');")
    db.Execute ("INSERT INTO tb_DCM_Daten_info (EinheitX,EinheitY,Einheitwert,Name,Funktion,FzgID) VALUES('" & Trim(Einheitx) & "','" & Trim(EinheitY) & "','" & Trim(Einheitwert) & "','" & Trim(nameknl.Item(0)) & "','" & Trim(Funktion) & "','" & ID & "');")
 End If
swknnf = False
swknl = False
swfst = False
X = 0
W = 0
Y = 0
Erase stxnums
Erase wertnums
Erase stynums
End If

If matchstx.Count <> 0 And (swknnf = True Or swknl = True) Then

 Set matchxnum = regxnum.Execute(Mid(Trim(matchstx.Item(0)), 5))
 For Each M In matchxnum
   stxnums(X) = M
   X = X + 1
 Next M
 ' Wir haben ein Array voll von STX Werte

End If

If matchsty.Count <> 0 And swknnf = True Then
  Set matchynum = regxnum.Execute(Mid(Trim(matchsty.Item(0)), 5))
End If

If (matchwert.Count <> 0 Or matchtxt.Count <> 0) And swfst = True Then

If matchwert.Count <> 0 Then
 Set matchwertnum = regxnum.Execute(Mid(Trim(matchwert.Item(0)), 5))
End If

If matchtxt.Count <> 0 Then
 Set matchwertnum = regwtxt.Execute(Mid(Trim(matchtxt.Item(0)), 5))
End If
 For Each M In matchwertnum
   'DoCmd.RunSQL ("INSERT INTO tb_DCM_Daten (Wert,name,FzgID) VALUES ('" & M & "','" & namefwr.Item(0) & "','" & ID & "');")
    db.Execute ("INSERT INTO tb_DCM_Daten (Wert,name,FzgID) VALUES ('" & M & "','" & namefwr.Item(0) & "','" & ID & "');")
 Next M
End If

If matchwert.Count <> 0 And swknnf = True Then
Set matchwertnum = regxnum.Execute(Mid(Trim(matchwert.Item(0)), 5))
For Each M In matchwertnum
  wertnums(W) = M
  W = W + 1
Next M
If W = X Then
For K = 0 To X - 1
 'MsgBox stxnums(K) + " " + matchynum(0) + " " + wertnums(K) + " " + nameknf.Item(0)
    'DoCmd.RunSQL ("INSERT INTO tb_DCM_Daten (XValue,YValue,Wert,name,FzgID) VALUES ('" & stxnums(K) & "','" & matchynum(0) & "','" & wertnums(K) & "','" & nameknf.Item(0) & "','" & ID & "');")
    db.Execute ("INSERT INTO tb_DCM_Daten (XValue,YValue,Wert,name,FzgID) VALUES ('" & stxnums(K) & "','" & matchynum(0) & "','" & wertnums(K) & "','" & nameknf.Item(0) & "','" & ID & "');")
 Next K
 W = 0
End If
End If

If matchwert.Count <> 0 And swknl = True Then
  Set matchwertnum = regxnum.Execute(Mid(Trim(matchwert.Item(0)), 5))
  For Each M In matchwertnum
     wertnums(W) = M
     W = W + 1
    Next M
End If
Loop
'DoCmd.RunSQL ("Update tb_KonzeptDaten  Set DCMFile=""" & DCMfilename & """ where (Konzept= " & ID & ")")
MsgBox "Die Daten sind Erfolgreich gespeichert"
Exit Sub

'Errhandler:
'MsgBox "An error has occurred. The macro will end."
 'hier musste ein Code sein, um die Vorherige Daten zu löschen
End Sub

但将此文件保存到数据库(超过一小时)需要很长时间才有更好的方法来完成这项工作? (仅解析文本文件而不保存在db中需要15秒)

3 个答案:

答案 0 :(得分:1)

您的问题很可能是大量的个人插入。

您可以使用RecordSet和交易BeginTransCommitTrans的组合,如此答案的答案中所述:https://stackoverflow.com/a/21992758/6206

答案 1 :(得分:1)

正如我在对该问题的评论中提到的,我建议解析原始文本文件并写出这样的临时CSV文件:

94172,,,253.25,"BMW_2",230
94173,20.000000,85.000000,1.000000,"CrCtl_StM",230
94174,20.000000,85.000000,1.000000,"CrCtl_StM",230

...然后使用VBA DoCmd.TransferText方法导入CSV文件。

使用Recordset来执行插入(正如其他答案所建议的)当然是可能的,但除非您在事务中包装整批插入,否则它仍然可能相当慢,并且这样做可能会导致解决“超出文件共享锁定计数”错误的问题。使用该方法时还可能存在其他烦恼(如重要文件膨胀)。

答案 2 :(得分:0)

删除使用SQL插入命令并将其替换为reocrdset。使用保持打开的reocrdset(与许多单独的插入命令相反)的速度要快很多倍。