我有一个读取CSV文件并构建数据表的函数。我不会声称这是我自己的因为它不是。我不记得我从哪里得到它,但它是来自几个不同来源的组合。
我的功能正常工作,直到我需要处理一些在值中带有“,”逗号的CSV。任何人都可以帮我解决这个问题吗?
非常感谢..
示例CSV文件
FirstName, LastName, Comment, "address, just city",
John, Smith, "you are very good, but ugly", London,
Britney, Spear, "I am a singer, and beautiful", New York,
我的功能
Public Function BuildDataTable() As DataTable
Dim myTable As DataTable = New DataTable("MyTable")
Dim i As Integer
Dim myRow As DataRow
Dim fieldValues As String()
Dim myReader As IO.StreamReader
Dim csv2xml As New csv2xml
Try
'Open file and read first line to determine how many fields there are.
myReader = File.OpenText(_fileFullPath)
fieldValues = myReader.ReadLine().Split(_seperator)
'Create data columns accordingly
If _hasheader = False Then
For i = 0 To fieldValues.Length() - 1
myTable.Columns.Add(New DataColumn("Column(" & i & ")"))
Next
Else
'if the file has header, take the first row as header for datatable
For i = 0 To fieldValues.Length() - 1
myTable.Columns.Add(New DataColumn(fieldValues(i).Replace(" ", "")))
Next
End If
'Adding the first line of data to data table
myRow = myTable.NewRow
'if the csv file has not got a column header. defined by radio button list on first page by user
'if csv file has header, then not need to read the first line
If _hasheader = False Then
For i = 0 To fieldValues.Length() - 1
myRow.Item(i) = fieldValues(i).ToString
Next
myTable.Rows.Add(myRow)
End If
'Now reading the rest of the data to data table
While myReader.Peek() <> -1
fieldValues = myReader.ReadLine().Split(_seperator)
myRow = myTable.NewRow
For i = 0 To fieldValues.Length() - 1
myRow.Item(i) = fieldValues(i).Trim.ToString
Next
'check if there are empty rows in csv, ignore empty rows
If Not csv2xml.AreAllColumnsEmpty(myRow) = True Then
myTable.Rows.Add(myRow)
End If
End While
Catch ex As Exception
'MsgBox("Error building datatable: " & ex.Message)
Dim oError As ErrorLog = New ErrorLog
oError.LogError(_strWebsiteName, _
loginID, _
ex.Source.ToString, _
ex.Message.ToString, _
, _
ex.StackTrace.ToString)
oError = Nothing
Return New DataTable("Empty")
'Server.Transfer(CustomErrorPage)
Finally
csv2xml = Nothing
myRow = Nothing
End Try
myReader.Close()
Return myTable
End Function
答案 0 :(得分:6)
只需使用VB TextFieldParser类,并将HasFieldsEnclosedInQuotes属性设置为True:
答案 1 :(得分:2)
您的问题是,您似乎只是通过将逗号分隔每个行字符串作为分隔符来解析列。该方法不适用于所有CSV文件,因为如果它是一个包含逗号的字符串,如您所示,整个字段值将被引号括起来。所以你需要首先用引号分割,然后用逗号分隔剩下的内容,并从字段的值中删除引号,这是一个实质上更复杂的算法。你可以自己编写类似的东西,或者使用正则表达式来做,但那时,我认为你最好只使用ADO来加载CSV文件:
Public Function LoadCsvFile(filePath As String, hasHeader As Boolean) As DataTable
Dim folderPath As String = Path.GetDirectoryName(filePath)
Dim fileName As String = Path.GetFileName(filePath)
Dim hdr As String = Nothing
If hasHeader Then
hdr = "Yes"
Else
hdr = "No"
End If
Dim connectionString As String = String.Format("Driver={Microsoft Text Driver (*.txt; *.csv)};Dbq={0};Extended Properties=""Text;HDR={1};FMT=Delimited""", folderPath, hdr)
Dim connection As New OdbcConnection(connectionString)
Dim adapter As New OdbcDataAdapter("SELECT * FROM [" + fileName + "]", connection)
Dim table As New DataTable()
adapter.Fill(table)
Return table
End Function