Dim re As New Regex("((?<field>[^"",\r\n]+)|""(?<field>([^""]|"""")+)"")((?<rowbreak>,\r\n|\r\n|\n|$)|,)")
3700,Collin,Franc,,franc@domain.com,Collins,Francine,,franc@domain.com,"Enel North America, Inc.",One T Drive,Suite 220,MyCity,MA,77774,1,traci@domain.com,,,,,3700,
3701,Steur,Larry,,larry@domain.com,Steur,Larry,,larry@domain.com,"St. Petersburg Corp, Inc.",10490 Gady Blvd,,MyCity,FL,33772,1,victor@domain.com,,,,,3701
3705,Giger,Tina,CFO,tina@mydomain.com,Giger,Tina,CFO,tina@mydomain.com,Net Technologies,23 Calab Rd,Suite 202,Calabas,CA,77777,1,Mark@mydomain.com,,,,,3705,
我使用的RegEx不起作用,如果有空列,它会将下一列“滑动”到它的位置。我该怎么做以允许以下内容为每一行创建五列?
1,2,3,4,5
Test,,,Test,
Test,,Test,,
这是我的完整代码:
<%@ Page Language="VB" MasterPageFile="~/_Common/MasterPage.master" %>
<%@ Import Namespace="System.Data" %>
<%@ Import Namespace="System.IO" %>
<%@ Import Namespace="System.Text.RegularExpressions" %>
<%@ Import Namespace="System.Xml" %>
<script runat="server">
Private Sub Page_Load(ByVal sender As Object, ByVal e As System.EventArgs)
' call the parser
Dim dt As DataTable = ParseCSVFile(Server.MapPath("Documents/Test.csv"))
' bind the resulting DataTable to a DataGrid Web Control
GridView1.DataSource = dt
GridView1.DataBind()
End Sub
Public Function ParseCSV(ByVal inputString As String) As DataTable
Dim dt As New DataTable()
' declare the Regular Expression that will match versus the input string
Dim re As New Regex("((?<field>[^"",\r\n]+)|""(?<field>([^""]|"""")+)"")((?<rowbreak>,\r\n|\r\n|\n|$)|,)")
Dim colArray As New ArrayList()
Dim rowArray As New ArrayList()
Dim colCount As Integer = 0
Dim maxColCount As Integer = 0
Dim rowbreak As String = ""
Dim field As String = ""
Dim mc As MatchCollection = re.Matches(inputString)
For Each m As Match In mc
' retrieve the field and replace two double-quotes with a single double-quote
field = m.Result("${field}").Replace("""""", """")
rowbreak = m.Result("${rowbreak}")
If field.Length > 0 Then
colArray.Add(field)
colCount += 1
End If
If rowbreak.Length > 0 Then
' add the column array to the row Array List
rowArray.Add(colArray.ToArray())
' create a new Array List to hold the field values
colArray = New ArrayList()
If colCount > maxColCount Then
maxColCount = colCount
End If
colCount = 0
End If
Next
If rowbreak.Length = 0 Then
' this is executed when the last line doesn't
' end with a line break
rowArray.Add(colArray.ToArray())
If colCount > maxColCount Then
maxColCount = colCount
End If
End If
' create the columns for the table
For i As Integer = 0 To maxColCount - 1
dt.Columns.Add([String].Format("col{0:000}", i))
Next
' convert the row Array List into an Array object for easier access
Dim ra As Array = rowArray.ToArray()
For i As Integer = 0 To ra.Length - 1
' create a new DataRow
Dim dr As DataRow = dt.NewRow()
' convert the column Array List into an Array object for easier access
Dim ca As Array = DirectCast((ra.GetValue(i)), Array)
' add each field into the new DataRow
For j As Integer = 0 To ca.Length - 1
dr(j) = ca.GetValue(j)
Next
' add the new DataRow to the DataTable
dt.Rows.Add(dr)
Next
' in case no data was parsed, create a single column
If dt.Columns.Count = 0 Then
dt.Columns.Add("NoData")
End If
Return dt
End Function
Public Function ParseCSVFile(ByVal path As String) As DataTable
Dim inputString As String = ""
' check that the file exists before opening it
If File.Exists(path) Then
Dim sr As New StreamReader(path)
inputString = sr.ReadToEnd()
sr.Close()
End If
Return ParseCSV(inputString)
End Function
</script>
<asp:Content ID="Content1" ContentPlaceHolderID="head" Runat="Server">
</asp:Content>
<asp:Content ID="Content2" ContentPlaceHolderID="ContentPlaceHolder1" Runat="Server">
<asp:FileUpload ID="FileUpload1" runat="server" />
<asp:Button ID="btnUpload" runat="server" Text="Upload" />
<asp:GridView ID="GridView1" runat="server">
</asp:GridView>
</asp:Content>
我实际上更喜欢用Linq做这个,但我找不到任何使用带引号值的CSV文件的示例(对于值本身包含逗号的值)。
** 更新 **
我将RegEx更改为此,这让我更接近:
Dim re As New Regex("(?<field>,)|((?<field>[^"",\r\n]+)|""(?<field>([^""]|"""")+)"")(,|(?<rowbreak>\r\n|\n|$))")
现在它将所有数据放在正确的列中,但空列中只有逗号
答案 0 :(得分:1)
为什么要使用正则表达式?你有一个非常简单的解析工作 - 使用逗号作为分隔符,除非它们在引号内。因此,您可以逐个字符地迭代每一行,在您点击oppening报价时设置标记,并在您点击结束报价时取消设置。而“内部引用”则忽略逗号。 (我放弃尝试使用正则表达式执行此操作)...试试这个:
private DataTable GetDataTableFromCsv(string csvContent)
{
DataTable dt = new DataTable();
string all = csvContent;
string[] lines = all.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
foreach (String s in lines)
{
bool isInsideQuote = false;
List<string> values = new List<string>();
string accumulator = "";
for (int j = 0; j < s.Length; j++)// (char c in s)
{
char c = s[j];
if (c == '"')
{
if (!isInsideQuote)
{
isInsideQuote = true;
}
else
{
isInsideQuote = false;
}
accumulator += "\"";
}
else if (c == ',' && !isInsideQuote)
{
values.Add(accumulator);
accumulator = ""; //reset
}
else if (j == s.Length - 1)
{
accumulator += c;
values.Add(accumulator);
accumulator = "";
}
else
{
accumulator += c;
}
}
//first time, create relevant columns
if (dt.Columns.Count == 0)
{
foreach (string colExample in values)
{
dt.Columns.Add();
}
//list of strings now contains an example.
}
DataRow dr = dt.NewRow();
for (int i = 0; i < values.Count; i++)
{
dr[i] = values[i];
}
dt.Rows.Add(dr);
}
return dt;
}
答案 1 :(得分:0)
尝试这样的事情:
"(?<field1>.*),(?<field2>.*),(?<field3>.*),(?<field4>.*),(?<field5>.*)"