CSV重复字段名称

时间:2013-04-09 05:50:12

标签: vb.net csv ssis

我有一个从核心系统中提取的CSV文件。最近对系统的开发更改使CSV文件数据包含重复的列/字段名称。

因此字段不止一次出现。

  • 创作者名字
  • 创作者姓氏
  • 创作者薪资编号
  • 收入
  • 联系
  • 销售团队
  • 频道

然后我们通过SSIS / DTS包上传这些数据。该包实际上不会运行或使用重复字段名称。因此,我们需要删除或重命名重复的字段。

所以我想创建一个C#或VB脚本,当它计算一个字段名称大于1时,重命名重复的字段名称,并修复为1。

有没有一种简单的方法可以通过Microsoft.VisualBasic.FileIO.TextFieldParser或类似的东西来做到这一点?不确定是否有人遇到类似的问题。

我添加了一个脚本任务,它运行Visual Basic 2008代码(下面)WIP;

Public Sub Main()

    'Check if file exist
    Dim filename As String = Dts.Variables.Item("fileName").Value
    If Not System.IO.File.Exists(filename) Then
        Exit Sub
    End If

    Dim csvFileArray(1, 1) As String
    Dim newCsvFileArray(1, 1) As String
    Dim streamReader As StreamReader = File.OpenText(filename)
    Dim strlines() As String

    strlines = streamReader.ReadToEnd().Split(Environment.NewLine)

    ' Redimension the array.
    Dim num_rows As Long
    Dim num_cols As Long
    'Dim counter As Integer = 0     '' >> Idea is to add this to the array as a 3rd dimension so we can count each field
    Dim fieldCounter As Integer = 0
    Dim strline() As String

    num_rows = UBound(strlines)
    strline = strlines(0).Split(",")
    num_cols = UBound(strline)
    ReDim csvFileArray(num_rows, num_cols)
    ReDim newCsvFileArray(num_rows, num_cols)

    ' Copy the data into the array.
    Dim fields As Integer
    Dim rows As Integer

    For rows = 0 To num_rows - 1
        strline = strlines(rows).Split(",")
        For fields = 0 To num_cols - 1
            csvFileArray(rows, fields) = strline(fields)
        Next
    Next

    Dim currentField As String = ""
    Dim comparrisionField As String = ""

    Dim newRows As Integer = 0
    Dim newFields As Integer = 0
    rows = 0

    ' Compare the current array to if they match anything in the new array
    For rows = 0 To num_rows - 1
        For fields = 0 To num_cols - 1
            currentField = csvFileArray(rows, fields)


            If rows = 0 Then
                ' If we are dealing with Fields i.e Row=0
                For newFields = 0 To num_cols - 1

                    comparrisionField = newCsvFileArray(newRows, newFields)
                    If String.IsNullOrEmpty(currentField) Then
                    Else
                        If currentField.Equals(comparrisionField) Then

                            If currentField <> "" Then
                                fieldCounter = fieldCounter + 1

                                ' if we have already added this column, then append a number
                                If fieldCounter >= 1 Then
                                    currentField = currentField + " " + CStr(fieldCounter)
                                End If
                            End If
                        End If
                    End If


                Next
            Else
                ' This means we are dealing with the Rows i/e not row = 0
                currentField = currentField
            End If
            newRows = 0
            newFields = 0
            fieldCounter = 0
            ' save currentField in the same layout as the initial file
            newCsvFileArray(rows, fields) = currentField
        Next
    Next

    ' Amend the duplicate field names
    Dim sw As New System.IO.StreamWriter(Left(filename, Len(filename) - Len(Right(filename, 4))) + "_amended.csv")
    rows = 0
    Dim currentLine As String = ""

    For rows = 0 To num_rows - 1
        For fields = 0 To num_cols - 1
            ' Save the data back to the filesystem
            If fields = 0 Then
                currentLine = newCsvFileArray(rows, fields)
            Else
                currentLine = currentLine + "," + newCsvFileArray(rows, fields)
            End If
        Next
        If currentLine <> "" Then
            sw.WriteLine(currentLine.Replace(vbCr, "").Replace(vbLf, ""))
        End If
    Next

    sw.Close()

    Dts.TaskResult = ScriptResults.Success
End Sub

0 个答案:

没有答案