使用VB.NET比较和合并文本文件的多个文件

时间:2011-06-23 13:54:24

标签: vb.net merge text-files streamreader

我有多个需要合并的文本文件。但我需要在合并前比较参考编号。

下面是文本文件

Text 1    
001Email
002Video
003SocialNetwork

Text 2
001Gmail
001Yahoo
002Youtube
002Metacafe
003Facebook
003Myspace

Text 3
www.gmail.com001
www.yahoo.com001
www.youtube.com002
www.myspace.com002
www.facebook.com003
www.myspace.com003


Output


001Email
001Gmail
www.gmail.com001
001Yahoo
wwww.yahoo.com001
002Video
002Youtube
www.youtube.com002
002Metacafe
www.metacafe.com002
003SocialNetwork
003Facebook
www.facebook.com003
003Myspace
www.myspace.com003

最快的处理方式是逐行读取比较。文本文件由数千行

组成

1 个答案:

答案 0 :(得分:0)

这可能是一个过于复杂的解决方案。代码中的注释应该有希望地解释一切。输出与您所拥有的不完全匹配,因为我不知道有多少顺序对于一切都很重要。它首先按引用号排序所有内容,然后按字符串的文本部分排序(不包括www.)。您发布的结果是参考编号顺序,然后是文件解析顺序,然后按字母顺序排列(002Metacafe来自002Video)。如果这很重要,请告诉我。

Option Explicit On
Option Strict On

Imports System.IO
Imports System.Text.RegularExpressions

Public Class Form1
    Private Sub Form1_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
        ''//List of files to process
        Dim Files As New List(Of String)
        Files.Add(Path.Combine(My.Computer.FileSystem.SpecialDirectories.Desktop, "Text1.txt"))
        Files.Add(Path.Combine(My.Computer.FileSystem.SpecialDirectories.Desktop, "Text2.txt"))
        Files.Add(Path.Combine(My.Computer.FileSystem.SpecialDirectories.Desktop, "Text3.txt"))

        ''//Will hold the current line being read
        Dim Line As String

        ''//Holds our main collection of data
        Dim MyData As New List(Of Data)

        ''//Loop through each file
        For Each F In Files
            ''//Open the file for reading
            Using FS As New FileStream(F, FileMode.Open, FileAccess.Read, FileShare.Read)
                Using SR As New StreamReader(FS)

                    ''//Read each line
                    Line = SR.ReadLine()
                    Do While Line IsNot Nothing
                        ''//The data constructor handles parsing of the line
                        MyData.Add(New Data(Line))
                        ''//Read next line
                        Line = SR.ReadLine()
                    Loop

                End Using
            End Using
        Next

        ''//Our data implements IComparable(Of Data) so we can just sort the list
        MyData.Sort()

        ''//Output our data
        For Each D In MyData
            Trace.WriteLine(D)
        Next

        Me.Close()
    End Sub
End Class
Public Class Data
    Implements IComparable(Of Data)

    ''//Our RegEx pattern for looking for a string that either starts or ends with numbers
    Private Shared ReadOnly Pattern As String = "^(?<RefStart>\d+)?(?<Text>.*?)(?<RefEnd>\d+)?$"

    Public Text As String                      ''//The _text_ portion of the data
    Public Reference As String                 ''//The reference number stored as text
    Public ReferenceAtStart As Boolean         ''//Whether the reference number was found at the start or end of the line
    Public ReadOnly Property ReferenceAsNum() As Integer  ''//Numeric version of the reference number for sorting
        Get
            Return Integer.Parse(Me.Reference)
        End Get
    End Property
    Public ReadOnly Property TextComparable() As String   ''//Remove the www for sorting
        Get
            Return Me.Text.Replace("www.", "")
        End Get
    End Property

    Public Sub New(ByVal line As String)
        ''//Sanity check
        If String.IsNullOrEmpty(line) Then Throw New ArgumentNullException("line")

        ''//Parse the line
        Dim M = Regex.Match(line, Pattern)
        If M Is Nothing Then Throw New ArgumentException("Line does not conform to expected pattern")

        ''//If the RefStart has a value then the number is at the beginning of the string
        If M.Groups("RefStart").Success Then
            Me.ReferenceAtStart = True
            Me.Reference = M.Groups("RefStart").Value
        Else ''//Otherwise its at the end
            Me.ReferenceAtStart = False
            Me.Reference = M.Groups("RefEnd").Value
        End If
        Me.Text = M.Groups("Text").Value
    End Sub

    Public Function CompareTo(ByVal other As Data) As Integer Implements System.IComparable(Of Data).CompareTo
        ''//Compare the reference numbers first
        Dim Ret = Me.ReferenceAsNum.CompareTo(other.ReferenceAsNum)
        ''//If they are the same then compare the strings
        If Ret = 0 Then Ret = String.Compare(Me.TextComparable, other.TextComparable, StringComparison.InvariantCultureIgnoreCase)

        Return Ret
    End Function
    Public Overrides Function ToString() As String
        ''//Reproduce the original string
        If Me.ReferenceAtStart Then
            Return String.Format("{0}{1}", Me.Reference, Me.Text)
        Else
            Return String.Format("{1}{0}", Me.Reference, Me.Text)
        End If
    End Function
End Class