我有多个需要合并的文本文件。但我需要在合并前比较参考编号。
下面是文本文件
Text 1
001Email
002Video
003SocialNetwork
Text 2
001Gmail
001Yahoo
002Youtube
002Metacafe
003Facebook
003Myspace
Text 3
www.gmail.com001
www.yahoo.com001
www.youtube.com002
www.myspace.com002
www.facebook.com003
www.myspace.com003
Output
001Email
001Gmail
www.gmail.com001
001Yahoo
wwww.yahoo.com001
002Video
002Youtube
www.youtube.com002
002Metacafe
www.metacafe.com002
003SocialNetwork
003Facebook
www.facebook.com003
003Myspace
www.myspace.com003
最快的处理方式是逐行读取比较。文本文件由数千行
组成答案 0 :(得分:0)
这可能是一个过于复杂的解决方案。代码中的注释应该有希望地解释一切。输出与您所拥有的不完全匹配,因为我不知道有多少顺序对于一切都很重要。它首先按引用号排序所有内容,然后按字符串的文本部分排序(不包括www.
)。您发布的结果是参考编号顺序,然后是文件解析顺序,然后按字母顺序排列(002Metacafe来自002Video)。如果这很重要,请告诉我。
Option Explicit On
Option Strict On
Imports System.IO
Imports System.Text.RegularExpressions
Public Class Form1
Private Sub Form1_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
''//List of files to process
Dim Files As New List(Of String)
Files.Add(Path.Combine(My.Computer.FileSystem.SpecialDirectories.Desktop, "Text1.txt"))
Files.Add(Path.Combine(My.Computer.FileSystem.SpecialDirectories.Desktop, "Text2.txt"))
Files.Add(Path.Combine(My.Computer.FileSystem.SpecialDirectories.Desktop, "Text3.txt"))
''//Will hold the current line being read
Dim Line As String
''//Holds our main collection of data
Dim MyData As New List(Of Data)
''//Loop through each file
For Each F In Files
''//Open the file for reading
Using FS As New FileStream(F, FileMode.Open, FileAccess.Read, FileShare.Read)
Using SR As New StreamReader(FS)
''//Read each line
Line = SR.ReadLine()
Do While Line IsNot Nothing
''//The data constructor handles parsing of the line
MyData.Add(New Data(Line))
''//Read next line
Line = SR.ReadLine()
Loop
End Using
End Using
Next
''//Our data implements IComparable(Of Data) so we can just sort the list
MyData.Sort()
''//Output our data
For Each D In MyData
Trace.WriteLine(D)
Next
Me.Close()
End Sub
End Class
Public Class Data
Implements IComparable(Of Data)
''//Our RegEx pattern for looking for a string that either starts or ends with numbers
Private Shared ReadOnly Pattern As String = "^(?<RefStart>\d+)?(?<Text>.*?)(?<RefEnd>\d+)?$"
Public Text As String ''//The _text_ portion of the data
Public Reference As String ''//The reference number stored as text
Public ReferenceAtStart As Boolean ''//Whether the reference number was found at the start or end of the line
Public ReadOnly Property ReferenceAsNum() As Integer ''//Numeric version of the reference number for sorting
Get
Return Integer.Parse(Me.Reference)
End Get
End Property
Public ReadOnly Property TextComparable() As String ''//Remove the www for sorting
Get
Return Me.Text.Replace("www.", "")
End Get
End Property
Public Sub New(ByVal line As String)
''//Sanity check
If String.IsNullOrEmpty(line) Then Throw New ArgumentNullException("line")
''//Parse the line
Dim M = Regex.Match(line, Pattern)
If M Is Nothing Then Throw New ArgumentException("Line does not conform to expected pattern")
''//If the RefStart has a value then the number is at the beginning of the string
If M.Groups("RefStart").Success Then
Me.ReferenceAtStart = True
Me.Reference = M.Groups("RefStart").Value
Else ''//Otherwise its at the end
Me.ReferenceAtStart = False
Me.Reference = M.Groups("RefEnd").Value
End If
Me.Text = M.Groups("Text").Value
End Sub
Public Function CompareTo(ByVal other As Data) As Integer Implements System.IComparable(Of Data).CompareTo
''//Compare the reference numbers first
Dim Ret = Me.ReferenceAsNum.CompareTo(other.ReferenceAsNum)
''//If they are the same then compare the strings
If Ret = 0 Then Ret = String.Compare(Me.TextComparable, other.TextComparable, StringComparison.InvariantCultureIgnoreCase)
Return Ret
End Function
Public Overrides Function ToString() As String
''//Reproduce the original string
If Me.ReferenceAtStart Then
Return String.Format("{0}{1}", Me.Reference, Me.Text)
Else
Return String.Format("{1}{0}", Me.Reference, Me.Text)
End If
End Function
End Class