我有一个从核心系统中提取的CSV文件。最近对系统的开发更改使CSV文件数据包含重复的列/字段名称。
因此字段不止一次出现。
然后我们通过SSIS / DTS包上传这些数据。该包实际上不会运行或使用重复字段名称。因此,我们需要删除或重命名重复的字段。
所以我想创建一个C#或VB脚本,当它计算一个字段名称大于1时,重命名重复的字段名称,并修复为1。
有没有一种简单的方法可以通过Microsoft.VisualBasic.FileIO.TextFieldParser或类似的东西来做到这一点?不确定是否有人遇到类似的问题。
我添加了一个脚本任务,它运行Visual Basic 2008代码(下面)WIP;
Public Sub Main()
'Check if file exist
Dim filename As String = Dts.Variables.Item("fileName").Value
If Not System.IO.File.Exists(filename) Then
Exit Sub
End If
Dim csvFileArray(1, 1) As String
Dim newCsvFileArray(1, 1) As String
Dim streamReader As StreamReader = File.OpenText(filename)
Dim strlines() As String
strlines = streamReader.ReadToEnd().Split(Environment.NewLine)
' Redimension the array.
Dim num_rows As Long
Dim num_cols As Long
'Dim counter As Integer = 0 '' >> Idea is to add this to the array as a 3rd dimension so we can count each field
Dim fieldCounter As Integer = 0
Dim strline() As String
num_rows = UBound(strlines)
strline = strlines(0).Split(",")
num_cols = UBound(strline)
ReDim csvFileArray(num_rows, num_cols)
ReDim newCsvFileArray(num_rows, num_cols)
' Copy the data into the array.
Dim fields As Integer
Dim rows As Integer
For rows = 0 To num_rows - 1
strline = strlines(rows).Split(",")
For fields = 0 To num_cols - 1
csvFileArray(rows, fields) = strline(fields)
Next
Next
Dim currentField As String = ""
Dim comparrisionField As String = ""
Dim newRows As Integer = 0
Dim newFields As Integer = 0
rows = 0
' Compare the current array to if they match anything in the new array
For rows = 0 To num_rows - 1
For fields = 0 To num_cols - 1
currentField = csvFileArray(rows, fields)
If rows = 0 Then
' If we are dealing with Fields i.e Row=0
For newFields = 0 To num_cols - 1
comparrisionField = newCsvFileArray(newRows, newFields)
If String.IsNullOrEmpty(currentField) Then
Else
If currentField.Equals(comparrisionField) Then
If currentField <> "" Then
fieldCounter = fieldCounter + 1
' if we have already added this column, then append a number
If fieldCounter >= 1 Then
currentField = currentField + " " + CStr(fieldCounter)
End If
End If
End If
End If
Next
Else
' This means we are dealing with the Rows i/e not row = 0
currentField = currentField
End If
newRows = 0
newFields = 0
fieldCounter = 0
' save currentField in the same layout as the initial file
newCsvFileArray(rows, fields) = currentField
Next
Next
' Amend the duplicate field names
Dim sw As New System.IO.StreamWriter(Left(filename, Len(filename) - Len(Right(filename, 4))) + "_amended.csv")
rows = 0
Dim currentLine As String = ""
For rows = 0 To num_rows - 1
For fields = 0 To num_cols - 1
' Save the data back to the filesystem
If fields = 0 Then
currentLine = newCsvFileArray(rows, fields)
Else
currentLine = currentLine + "," + newCsvFileArray(rows, fields)
End If
Next
If currentLine <> "" Then
sw.WriteLine(currentLine.Replace(vbCr, "").Replace(vbLf, ""))
End If
Next
sw.Close()
Dts.TaskResult = ScriptResults.Success
End Sub