由于连续两次双引号""
,我正在努力解析CSV文件中的值。
以下是我从维基百科中提取的CSV字段示例:1997,Ford,E350,"Super, ""luxurious"" truck"
我试图找到不同的方法来解释它。
我一直得到的结果是:
" 1997"
"福特"
" E350"
"超"
""超级"
" ""豪华""卡车""
这是我的VB.Net功能。
Private Function splitCSV(ByVal sLine As String) As List(Of String)
Dim comA As Integer = -1, comB = -1, quotA = -1, quotB = -1, pos = 0
Dim parsed As New List(Of String)
Dim quote As String = """"
Dim comma As String = ","
Dim len As Integer = sLine.Length
Dim first As Boolean = True
comA = sLine.IndexOf(comma, pos) ' Find the next comma.
quotA = sLine.IndexOf(quote, pos) ' Find the next quotation mark.
' This if function works if there is only one field in the given row.
If comA < 0 Then
parsed.Add(False)
Return parsed
End If
While pos < len ' While not at end of the string
comB = sLine.IndexOf(comma, comA + 1) ' Find the second comma
quotB = sLine.IndexOf(quote, quotA + 1) ' Find the second quotation mark
' Looking for the actual second quote mark
' Skips over the double quotation marks.
If quotA > -1 And quotA < comB Then ' If the quotation mark is before the first comma
If Math.Abs(quotA - quotB).Equals(1) Then
Dim tempA As Integer = quotA
Dim tempB As Integer = quotB
' Looking for the actual second quote mark
' Skips over the double quotation marks.
While (Math.Abs(tempA - tempB).Equals(1))
tempA = tempB
If Not tempA.Equals(sLine.LastIndexOf(quote)) Then
tempB = sLine.IndexOf(quote, tempA + 1)
Else
tempA = tempB - 2
End If
End While
quotB = tempB
End If
If quotB < 0 Then ' If second quotation mark does not exist
parsed.Add(False) ' End the function and Return False
Return parsed
End If
parsed.Add(sLine.Substring(quotA + 1, quotB - quotA - 1)) ' Otherwise, add the substring of initial and end quotation marks.
quotA = quotB ' Give quotA the position of quotB
pos = quotB ' Mark the current position
ElseIf comA < comB Then
If first Then ' If it is the first comma in the line,
parsed.Add(sLine.Substring(pos, comA)) ' Parse the first field
first = False ' The future commas will not be considered as the first one.
End If
comB = sLine.IndexOf(comma, comA + 1) ' Find the second comma
If comB > comA Then ' If the second comma exists
parsed.Add(sLine.Substring(comA + 1, comB - comA - 1)) ' Add the substring of the first and second comma.
comA = comB ' Give comA the position of comB
pos = comB ' Mark the current position
End If
ElseIf len > 0 Then ' If the first comma does not exist, as long as sLine has something,
parsed.Add(sLine.Substring(pos + 1, len - pos - 1)) ' Return the substing of position to end of string.
pos = len ' Mark the position at the end to exit out of while loop
End If
End While
Return parsed ' Return parsed list of string
End Function
答案 0 :(得分:2)
TextFieldParser
非常适合这种事情,当然比滚动你自己更容易。这很容易测试:我将你的样本复制到一个文件,然后:
Imports Microsoft.VisualBasic.FileIO
...
Using parser = New TextFieldParser("C:\Temp\CSVPARSER.TXT")
parser.Delimiters = New String() {","}
parser.TextFieldType = FieldType.Delimited
parser.HasFieldsEnclosedInQuotes = True
While parser.EndOfData = False
data = parser.ReadFields
' use pipe to show column breaks:
Dim s = String.Join("|", data)
Console.WriteLine(s)
End While
End Using
在这种情况下, HasFieldsEnclosedInQuotes = True
很重要。结果:
1997 |福特| E350 |超级,&#34;豪华&#34;卡车
超级之后的逗号看起来不合适 - 很可能 - 但它是原始内部的引号:1997,Ford,E350,"Super, ""luxurious"" truck"
还有其他库/包也适用于各种CSV布局和格式。
答案 1 :(得分:0)
之前我必须解析这些类型的文件。这是我最后写的。基本上,您一次扫描一个字符的传入文本。如果它是一个引用,只需记下它,除非最后一个字符也是引用。如果您使用带引号的文本,则会忽略分隔符。
Protected Function FlexSplitLine(incoming As String, fieldDelimiter As String, quoteDelimiter As String) As String()
Dim rval As New List(Of String)
Dim index As Integer
Dim Word As New System.Text.StringBuilder
Dim inQuote As Boolean
Dim QuoteChar As Char
Dim CommaChar As Char
index = 0
If quoteDelimiter Is Nothing OrElse quoteDelimiter.Length = 0 Then
quoteDelimiter = """"
End If
If fieldDelimiter Is Nothing OrElse fieldDelimiter.Length = 0 Then
fieldDelimiter = ","
End If
QuoteChar = quoteDelimiter(0)
CommaChar = fieldDelimiter(0)
Do While index < incoming.Length
If incoming(index) = QuoteChar Then
If index < incoming.Length - 1 AndAlso incoming(index + 1) = QuoteChar Then
Word.Append(QuoteChar)
index += 1
Else
inQuote = Not inQuote
End If
ElseIf incoming(index) = CommaChar AndAlso Not inQuote Then
rval.Add(Word.ToString)
Word.Length = 0
Else
Word.Append(incoming(index))
End If
index += 1
Loop
If inQuote Then
Throw New IndexOutOfRangeException("Ran past the end of the line while looking for the ending quote character.")
End If
rval.Add(Word.ToString)
Return rval.ToArray
End Function