我有一些非常大的文件要重新格式化,需要几个小时而不是几分钟。我试图加快它的速度。
输入文件是数亿行,7 - 9 GB
我怎样才能加快速度?感谢
示例输入文件内容......
# FIELD_RECORD_NO Col: 1 - 15 Decs: 3 Mult: 1.000000 MEAN
# SHOTLINE_NUMBER Col: 16 - 30 Decs: 3 Mult: 1.000000 MEAN
# SHOT_POINT_NO Col: 31 - 45 Decs: 3 Mult: 1.000000 MEAN
# RECEIVERLINE_NUMBER Col: 46 - 60 Decs: 3 Mult: 1.000000 MEAN
# FIELD_STATION_NUMBER Col: 61 - 75 Decs: 3 Mult: 1.000000 MEAN
# CHANNEL_NO Col: 76 - 90 Decs: 3 Mult: 1.000000 MEAN
1.000 5177.000 2006.000 2001.000 5106.000 1.000
1.000 5177.000 2006.000 2001.000 5107.000 2.000
1.000 5177.000 2006.000 2001.000 5109.000 3.000
1.000 5177.000 2006.000 2001.000 5110.000 4.000
1.000 5177.000 2006.000 2001.000 5111.000 5.000
1.000 5177.000 2006.000 2001.000 5112.000 6.000
1.000 5177.000 2006.000 2001.000 5113.000 7.000
1.000 5177.000 2006.000 2001.000 5115.000 8.000
2.000 5177.000 2006.000 2001.000 5106.000 1.000
2.000 5177.000 2006.000 2001.000 5108.000 2.000
2.000 5177.000 2006.000 2001.000 5109.000 3.000
2.000 5177.000 2006.000 2001.000 5110.000 4.000
2.000 5177.000 2006.000 2001.000 5112.000 5.000
2.000 5177.000 2006.000 2001.000 5113.000 6.000
2.000 5177.000 2006.000 2001.000 5115.000 7.000
示例输出文件内容...
H00 SPS format version number SPS 2.1;
X 11 5177.00 2006.001 1 21 2001.00 5106.00 5107.00-
X 11 5177.00 2006.001 3 71 2001.00 5109.00 5113.00-
X 11 5177.00 2006.001 8 81 2001.00 5115.00 5115.00-
X 21 5177.00 2006.001 1 11 2001.00 5106.00 5106.00-
X 21 5177.00 2006.001 2 41 2001.00 5108.00 5110.00-
X 21 5177.00 2006.001 5 61 2001.00 5112.00 5113.00-
X 21 5177.00 2006.001 7 71 2001.00 5115.00 5115.00-
...代码
Option Strict On
Public Class Main
Public Sub ImportVistaHeaderFile()
'On Error Resume Next
Dim stime As DateTime, etime As DateTime
Dim Readcnt As Integer
Readcnt = 0
stime = DateTime.Now
Dim strm As System.IO.Stream
Dim FFID As String = CStr(0)
Dim SL As String = CStr(0)
Dim SP As String = CStr(0)
Dim RL As String = CStr(0)
Dim RP As String = CStr(0)
Dim OldFFID As String = CStr(0)
Dim OldSL As String = CStr(0)
Dim OldSP As String = CStr(0)
Dim OldRL As String = CStr(0)
Dim OldRP As String = CStr(0)
Dim OldCH As String = CStr(0)
Dim FRP As String = CStr(0)
Dim FCH As String = CStr(0)
Dim LCH As String = CStr(0)
Dim ChCnt As Integer = 0
Dim XPSFile As String
Dim outfile As System.IO.StreamWriter
Main.OpenFileDialog1.Filter = "Text Files (*.acs)|*.asc"
Main.OpenFileDialog1.ShowDialog()
strm = Main.OpenFileDialog1.OpenFile()
Main.ToolStripStatusLabel1.Text = Main.OpenFileDialog1.FileName.ToString()
XPSFile = Main.OpenFileDialog1.FileName.ToString() & ".xps"
outfile = My.Computer.FileSystem.OpenTextFileWriter(XPSFile, False)
outfile.WriteLine("H00 SPS format version number SPS 2.1;")
outfile.Close()
Main.ToolStripStatusLabel3.Text = XPSFile
Main.Refresh()
If Not (strm Is Nothing) Then
Dim fileReader As System.IO.StreamReader
fileReader = My.Computer.FileSystem.OpenTextFileReader(Main.ToolStripStatusLabel1.Text)
Dim stringReader As String
While (fileReader.Peek() >= 0)
Readcnt = Readcnt + 1
'read line from file
stringReader = fileReader.ReadLine()
'filter headers
If Not stringReader.Contains("#") Then
'reformat data
Dim vals() As String = stringReader.Split(CChar(" "))
stringReader = String.Join("", vals)
vals = stringReader.Split(CChar("."))
ChCnt = ChCnt + 1
FFID = vals(0)
SL = vals(1).TrimStart("0"c)
SP = vals(2).TrimStart("0"c)
RL = vals(3).TrimStart("0"c)
RP = vals(4).TrimStart("0"c)
If CDbl(OldFFID) = 0 Then
'new file
OldFFID = FFID
OldSL = SL
OldSP = SP
OldRL = RL
OldRP = RP
End If
If FFID = OldFFID Then
'same ffid
If SL = OldSL Then
'same SL
If SP = OldSP Then
'same SP
If RL = OldRL Then
'same RL
If CDbl(RP) = CDbl(OldRP) + 1 Then
'consecutive RP
If CDbl(FRP) = 0 Then
FRP = CStr(CDbl(RP) - 1)
End If
'consecutive RPs
OldFFID = FFID
OldSL = SL
OldSP = SP
OldRL = RL
OldRP = RP
Else
If Not RP = OldRP Then
'RP Gap
'MsgBox("Receiver gap")
If CDbl(FRP) = 0 Then
FRP = OldRP
If CDbl(FRP) = 0 Then
MsgBox("FRP = 0")
End If
End If
LCH = CStr(ChCnt - 1)
FCH = CStr(CDbl(LCH) - (CDbl(OldRP) - CDbl(FRP)))
'write data to file
WriteXPS(OldFFID, OldSL, OldSP, FCH, LCH, OldRL, FRP, OldRP, "Gap", XPSFile)
OldRP = RP
FRP = CStr(0)
End If
End If
Else
'New RL
'write data to file
If CDbl(FRP) = 0 Then
FRP = OldRP
If CDbl(FRP) = 0 Then
MsgBox("FRP = 0")
End If
End If
LCH = CStr(ChCnt - 1)
FCH = CStr(CDbl(LCH) - (CDbl(OldRP) - CDbl(FRP)))
WriteXPS(OldFFID, OldSL, OldSP, FCH, LCH, OldRL, FRP, OldRP, "RL", XPSFile)
OldRL = RL
OldRP = RP
FRP = CStr(0)
End If
Else
'New SP
MsgBox("Duplicate FFID is not supported by SEG XPS files. FFID " & OldFFID & " SP " & OldSL & OldSP & " SP " & SL & SP)
OldFFID = FFID
OldSL = SL
OldSP = SP
OldRL = RL
OldRP = RP
FRP = CStr(0)
End If
Else
'New SL
MsgBox("Duplicate FFID is not supported by SEG XPS files. FFID " & OldFFID & " SP " & OldSL & OldSP & " SP " & SL & SP)
OldFFID = FFID
OldSL = SL
OldSP = SP
OldRL = RL
OldRP = RP
FRP = CStr(0)
End If
Else
'New FFID
'MsgBox("New FFID")
'write data to file
If CDbl(FRP) = 0 Then
FRP = OldRP
If CDbl(FRP) = 0 Then
MsgBox("FRP = 0")
End If
End If
LCH = CStr(ChCnt - 1)
FCH = CStr(CDbl(LCH) - (CDbl(OldRP) - CDbl(FRP)))
WriteXPS(OldFFID, OldSL, OldSP, FCH, LCH, OldRL, FRP, OldRP, "FFID", XPSFile)
OldFFID = FFID
OldSL = SL
OldSP = SP
OldRL = RL
OldRP = RP
ChCnt = 1
FRP = CStr(0)
End If
End If
End While
strm.Close()
Main.ToolStripStatusLabel1.Text = ""
Main.ToolStripStatusLabel2.Text = ""
Main.ToolStripStatusLabel3.Text = ""
'MessageBox.Show("File creation complete")
etime = DateTime.Now
MsgBox(DateDiff(DateInterval.Second, stime, etime) & " " & Readcnt)
End If
End Sub
代码..
Private Sub WriteXPS(FFID As String, SL As String, SP As String, FCH As String, LCH As String, RL As String, FRP As String, LRP As String, XPSFile As String)
Dim outfile As System.IO.StreamWriter = My.Computer.FileSystem.OpenTextFileWriter(XPSFile, True)
If FFID Mod 10 = 0 Then
Me.ToolStripStatusLabel2.Text = "Processing Files: " & FFID + 1 & "-" & FFID + 10
Me.Refresh()
End If
FFID = FFID.PadLeft(14)
SL = SL.PadLeft(8)
SP = SP.PadLeft(7)
FCH = FCH.PadLeft(5)
LCH = LCH.PadLeft(5)
RL = RL.PadLeft(7)
FRP = FRP.PadLeft(7)
LRP = LRP.PadLeft(7)
outfile = My.Computer.FileSystem.OpenTextFileWriter(XPSFile, True)
outfile.WriteLine("X," & FFID & "1," & SL & ".00," & SP & ".001," & FCH & LCH & "1," & RL & ".00," & FRP & ".00," & LRP & ".00-")
outfile.Close()
End Sub
答案 0 :(得分:2)
我建议首先分析您的应用程序,并建议:
这应该告诉你性能问题在哪里。
注意:两家供应商都提供试用版。
我还建议找到读取和写入文件所需的经过时间而不进行任何计算(即拆分行并将字段转换为小数)。如果这与计算的当前经过时间接近,则不太可能使其运行得更快。
答案 1 :(得分:0)
我已将文件保持打开状态解决了主要问题。
此版本的代码读取37,120,000 +行,并在1004秒内输出440,000 +行。
感谢所有人的帮助。
最好的问候
Option Strict On
Module MainModule
Public Sub ImportVistaHeaderFile()
'On Error Resume Next
Dim stime As DateTime
Dim etime As DateTime
Dim Readcnt As Long = 0
Dim Writecnt As Long = 0
Dim TRcnt As Long = 0
Dim ChCnt As Long = 0
Dim Duplicates As Long = 0
stime = DateTime.Now
etime = DateTime.Now
Dim strm As System.IO.Stream
Dim outfile As System.IO.StreamWriter
Dim WrtLogfile As System.IO.StreamWriter
Dim XPSFile As String = CStr(0)
Dim Logfile As String = CStr(0)
Dim FFID As String = CStr(0)
Dim SL As String = CStr(0)
Dim SP As String = CStr(0)
Dim RL As String = CStr(0)
Dim RP As String = CStr(0)
Dim FRP As String = CStr(0)
Dim FCH As String = CStr(0)
Dim LCH As String = CStr(0)
Dim OldFFID As String = CStr(0)
Dim OldSL As String = CStr(0)
Dim OldSP As String = CStr(0)
Dim OldRL As String = CStr(0)
Dim OldRP As String = CStr(0)
Dim OldCH As String = CStr(0)
Dim wFFID As String = CStr(0)
Dim wSL As String = CStr(0)
Dim wSP As String = CStr(0)
Dim wFCH As String = CStr(0)
Dim wLCH As String = CStr(0)
Dim wRL As String = CStr(0)
Dim wFRP As String = CStr(0)
Dim wRP As String = CStr(0)
Main.OpenFileDialog1.Filter = "Text Files (*.acs)|*.asc"
Main.OpenFileDialog1.ShowDialog()
strm = Main.OpenFileDialog1.OpenFile()
'update form with input file
Main.ToolStripStatusLabel1.Text = Main.OpenFileDialog1.FileName.ToString()
Main.Refresh()
XPSFile = Main.OpenFileDialog1.FileName.ToString() & ".xps"
Logfile = FileIO.FileSystem.GetName(XPSFile) & ".log"
'open log file and write header
WrtLogfile = My.Computer.FileSystem.OpenTextFileWriter(Logfile, False)
WrtLogfile.WriteLine("# Import file " & Main.OpenFileDialog1.FileName.ToString & " at " & stime)
WrtLogfile.WriteLine()
WrtLogfile.Close()
'open logfile for loging
WrtLogfile = My.Computer.FileSystem.OpenTextFileWriter(Logfile, True)
'open output file and write header
outfile = My.Computer.FileSystem.OpenTextFileWriter(XPSFile, False)
outfile.WriteLine("H00 SPS format version number SPS 2.1;")
outfile.Close()
'open file for data
outfile = My.Computer.FileSystem.OpenTextFileWriter(XPSFile, True)
Main.ToolStripStatusLabel3.Text = XPSFile
Main.Refresh()
If Not (strm Is Nothing) Then
Dim fileReader As System.IO.StreamReader
fileReader = My.Computer.FileSystem.OpenTextFileReader(Main.ToolStripStatusLabel1.Text)
Dim stringReader As String
While (fileReader.Peek() >= 0)
'read line from file
stringReader = fileReader.ReadLine()
'filter headers
If Not stringReader.Contains("#") Then
'reformat data
Readcnt = Readcnt + 1
Dim vals() As String = stringReader.Split(CChar(" "))
stringReader = String.Join("", vals)
vals = stringReader.Split(CChar("."))
ChCnt = ChCnt + 1
FFID = vals(0)
SL = vals(1).TrimStart("0"c)
SP = vals(2).TrimStart("0"c)
RL = vals(3).TrimStart("0"c)
RP = vals(4).TrimStart("0"c)
If CDbl(OldFFID) = 0 Then
'new file
OldFFID = FFID
OldSL = SL
OldSP = SP
OldRL = RL
OldRP = RP
End If
If FFID = OldFFID Then
'same ffid
If SL = OldSL Then
'same SL
If SP = OldSP Then
'same SP
If RL = OldRL Then
'same RL
If CDbl(RP) = CDbl(OldRP) + 1 Then
If CDbl(FRP) = 0 Then
FRP = CStr(CDbl(RP) - 1)
End If
'consecutive RP
OldFFID = FFID
OldSL = SL
OldSP = SP
OldRL = RL
OldRP = RP
Else
If Not RP = OldRP Then
'RP Gap
If CDbl(FRP) = 0 Then
FRP = OldRP
If CDbl(FRP) = 0 Then
MsgBox("FRP = 0")
End If
End If
LCH = CStr(ChCnt - 1)
FCH = CStr(CDbl(LCH) - (CDbl(OldRP) - CDbl(FRP)))
If CDbl(FFID) Mod 10 = 0 Then
Main.ToolStripStatusLabel2.Text = "Processing FFID: " & CDbl(FFID) - 9 & "-" & FFID
Main.Refresh()
End If
wFFID = OldFFID.PadLeft(14)
wSL = OldSL.PadLeft(8)
wSP = OldSP.PadLeft(7)
wFCH = FCH.PadLeft(5)
wLCH = LCH.PadLeft(5)
wRL = OldRL.PadLeft(7)
wFRP = FRP.PadLeft(7)
wRP = OldRP.PadLeft(7)
'write data to file
outfile.Write("X")
outfile.Write(wFFID)
outfile.Write("1")
outfile.Write(wSL)
outfile.Write(".00")
outfile.Write(wSP)
outfile.Write(".001")
outfile.Write(wFCH)
outfile.Write(wLCH)
outfile.Write("1")
outfile.Write(wRL)
outfile.Write(".00")
outfile.Write(wFRP)
outfile.Write(".00")
outfile.Write(wRP)
outfile.WriteLine(".00-")
TRcnt = CLng(TRcnt + (CDbl(LCH) - CDbl(FCH) + 1))
Writecnt = Writecnt + 1
OldRP = RP
FRP = CStr(0)
Else
If Not Readcnt = 1 Then
If CDbl(FRP) = 0 Then
FRP = OldRP
If CDbl(FRP) = 0 Then
MsgBox("FRP = 0")
End If
End If
LCH = CStr(ChCnt - 1)
FCH = CStr(CDbl(LCH) - (CDbl(OldRP) - CDbl(FRP)))
If CDbl(FFID) Mod 10 = 0 Then
Main.ToolStripStatusLabel2.Text = "Processing FFID: " & CDbl(FFID) - 9 & "-" & FFID
Main.Refresh()
End If
wFFID = OldFFID.PadLeft(14)
wSL = OldSL.PadLeft(8)
wSP = OldSP.PadLeft(7)
wFCH = FCH.PadLeft(5)
wLCH = LCH.PadLeft(5)
wRL = OldRL.PadLeft(7)
wFRP = FRP.PadLeft(7)
wRP = OldRP.PadLeft(7)
'write data to file
outfile.Write("X")
outfile.Write(wFFID)
outfile.Write("1")
outfile.Write(wSL)
outfile.Write(".00")
outfile.Write(wSP)
outfile.Write(".001")
outfile.Write(wFCH)
outfile.Write(wLCH)
outfile.Write("1")
outfile.Write(wRL)
outfile.Write(".00")
outfile.Write(wFRP)
outfile.Write(".00")
outfile.Write(wRP)
outfile.WriteLine(".00-")
TRcnt = CLng(TRcnt + (CDbl(LCH) - CDbl(FCH) + 1))
Writecnt = Writecnt + 1
OldRP = RP
FRP = CStr(0)
WrtLogfile.WriteLine("Duplicate RP - FFID: " & FFID & " SL: " & SL & " SP" & SP & " RL: " & RL & " RP " & RP & " CH: " & CDbl(LCH) + 1)
Duplicates = Duplicates + 1
End If
End If
End If
Else
'New RL
If CDbl(FRP) = 0 Then
FRP = OldRP
If CDbl(FRP) = 0 Then
MsgBox("FRP = 0")
End If
End If
LCH = CStr(ChCnt - 1)
FCH = CStr(CDbl(LCH) - (CDbl(OldRP) - CDbl(FRP)))
If CDbl(FFID) Mod 10 = 0 Then
Main.ToolStripStatusLabel2.Text = "Processing FFID: " & CDbl(FFID) - 9 & "-" & FFID
Main.Refresh()
End If
wFFID = OldFFID.PadLeft(14)
wSL = OldSL.PadLeft(8)
wSP = OldSP.PadLeft(7)
wFCH = FCH.PadLeft(5)
wLCH = LCH.PadLeft(5)
wRL = OldRL.PadLeft(7)
wFRP = FRP.PadLeft(7)
wRP = OldRP.PadLeft(7)
'write data to file
outfile.Write("X")
outfile.Write(wFFID)
outfile.Write("1")
outfile.Write(wSL)
outfile.Write(".00")
outfile.Write(wSP)
outfile.Write(".001")
outfile.Write(wFCH)
outfile.Write(wLCH)
outfile.Write("1")
outfile.Write(wRL)
outfile.Write(".00")
outfile.Write(wFRP)
outfile.Write(".00")
outfile.Write(wRP)
outfile.WriteLine(".00-")
TRcnt = CLng(TRcnt + (CDbl(LCH) - CDbl(FCH) + 1))
Writecnt = Writecnt + 1
OldRL = RL
OldRP = RP
FRP = CStr(0)
End If
Else
'New SP
MsgBox("Duplicate FFID is not supported by SEG XPS files. FFID " & OldFFID & " SP " & OldSL & OldSP & " SP " & SL & SP)
OldFFID = FFID
OldSL = SL
OldSP = SP
OldRL = RL
OldRP = RP
FRP = CStr(0)
End If
Else
'New SL
MsgBox("Duplicate FFID is not supported by SEG XPS files. FFID " & OldFFID & " SP " & OldSL & OldSP & " SP " & SL & SP)
OldFFID = FFID
OldSL = SL
OldSP = SP
OldRL = RL
OldRP = RP
FRP = CStr(0)
End If
Else
'New FFID
If CDbl(FRP) = 0 Then
FRP = OldRP
If CDbl(FRP) = 0 Then
MsgBox("FRP = 0")
End If
End If
LCH = CStr(ChCnt - 1)
FCH = CStr(CDbl(LCH) - (CDbl(OldRP) - CDbl(FRP)))
If CDbl(FFID) Mod 10 = 0 Then
Main.ToolStripStatusLabel2.Text = "Processing FFID: " & CDbl(FFID) - 9 & "-" & FFID
Main.Refresh()
End If
wFFID = OldFFID.PadLeft(14)
wSL = OldSL.PadLeft(8)
wSP = OldSP.PadLeft(7)
wFCH = FCH.PadLeft(5)
wLCH = LCH.PadLeft(5)
wRL = OldRL.PadLeft(7)
wFRP = FRP.PadLeft(7)
wRP = OldRP.PadLeft(7)
'write data to file
outfile.Write("X")
outfile.Write(wFFID)
outfile.Write("1")
outfile.Write(wSL)
outfile.Write(".00")
outfile.Write(wSP)
outfile.Write(".001")
outfile.Write(wFCH)
outfile.Write(wLCH)
outfile.Write("1")
outfile.Write(wRL)
outfile.Write(".00")
outfile.Write(wFRP)
outfile.Write(".00")
outfile.Write(wRP)
outfile.WriteLine(".00-")
TRcnt = CLng(TRcnt + (CDbl(LCH) - CDbl(FCH) + 1))
Writecnt = Writecnt + 1
OldFFID = FFID
OldSL = SL
OldSP = SP
OldRL = RL
OldRP = RP
ChCnt = 1
FRP = CStr(0)
End If
End If
End While
'close input file
strm.Close()
'write buffer
If CDbl(FRP) = 0 Then
FRP = OldRP
If CDbl(FRP) = 0 Then
MsgBox("FRP = 0")
End If
End If
LCH = CStr(ChCnt)
FCH = CStr(CDbl(LCH) - (CDbl(OldRP) - CDbl(FRP)))
If CDbl(FFID) Mod 10 = 0 Then
Main.ToolStripStatusLabel2.Text = "Processing FFID: " & CDbl(FFID) - 9 & "-" & FFID
Main.Refresh()
End If
wFFID = OldFFID.PadLeft(14)
wSL = OldSL.PadLeft(8)
wSP = OldSP.PadLeft(7)
wFCH = FCH.PadLeft(5)
wLCH = LCH.PadLeft(5)
wRL = OldRL.PadLeft(7)
wFRP = FRP.PadLeft(7)
wRP = OldRP.PadLeft(7)
'write data to file
outfile.Write("X")
outfile.Write(wFFID)
outfile.Write("1")
outfile.Write(wSL)
outfile.Write(".00")
outfile.Write(wSP)
outfile.Write(".001")
outfile.Write(wFCH)
outfile.Write(wLCH)
outfile.Write("1")
outfile.Write(wRL)
outfile.Write(".00")
outfile.Write(wFRP)
outfile.Write(".00")
outfile.Write(wRP)
outfile.WriteLine(".00-")
TRcnt = CLng(TRcnt + (CDbl(LCH) - CDbl(FCH) + 1))
Writecnt = Writecnt + 1
OldRL = RL
OldRP = RP
FRP = CStr(0)
'close output file
outfile.Close()
etime = DateTime.Now
Main.ToolStripStatusLabel1.Text = XPSFile
Main.ToolStripStatusLabel2.Text = "Traces " & TRcnt
Main.ToolStripStatusLabel3.Text = Logfile
'write status to logfile
WrtLogfile.WriteLine()
WrtLogfile.WriteLine()
WrtLogfile.WriteLine("Total traces in " & Readcnt)
WrtLogfile.WriteLine("Total traces out " & TRcnt)
WrtLogfile.WriteLine("Duplicates traces found: " & Duplicates)
WrtLogfile.WriteLine()
WrtLogfile.WriteLine("# Export file " & Main.OpenFileDialog1.FileName.ToString & ".asc at " & etime)
WrtLogfile.WriteLine()
WrtLogfile.WriteLine("Total elapsed time " & DateDiff(DateInterval.Second, stime, etime) & " seconds")
'close logfile
WrtLogfile.Close()
End If
End Sub
End Module