我正在使用此库:CSV Reader但问题是.csv
文件的变形。
示例:
,UDEQPT,,PROMIS,,,,,,,,,,,,,,,,,,,,,,,,,10:20:15,27-Dec-2015,
,UDEQPT,,DELAY,,,,,,,,,am24134_1_drift,am24134.1_drift,229,19,,,3176.00,164.78,,,,,,5, 1.00,1,06:16:16,15-Jun-2016,,,,,,,
,UDEQPT,,DELAY,,,,,,,,,am24134_1_drift,am24134.1_drift,345,25,,,131.68,216.71,,,,,,6, 1.00,1,06:28:23,15-Jun-2016,,,,,,,
,UDEQPT,,DELAY,,,,,,,,,am24134_1_drift,am24134.1_drift,346,25,,,170.18,210.93,,,,,,7, 1.00,1,06:31:18,15-Jun-2016,,,,,,,
,UDEQPT,,DELAY,,,,,,,,,am24134_1_drift,am24134.1_drift,376,27,,,295.83,212.99,,,,,,8, 1.00,1,06:38:47,15-Jun-2016,,,,,,,
,UDEQPT,,ENDLOT,,,,def,def,def,def,,am24134_1_drift,am24134.1_drift,385,27,,,1214.13,213.82, 3.48, 3.11, 1.64, 25.96,1,8, 1.00,1,06:59:46,15-Jun-2016,,4395.91,1465945186,,def,0,1,385, 3.48,357,385, 92.9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
列数为54,因此如果一行中的数据数小于固定的列数,则会出错。在上面的示例中,第一行仅在索引30之前。您如何正确处理此问题?
这是我的代码:
using (var path = File.OpenRead(e.FullPath))
{
using (var csv = new CachedCsvReader(new StreamReader(path), false))
{
csv.Columns = new List<Column>
{
new Column { Name = "Delay_Code", Type = typeof(string) },
new Column { Name = "PROMIS_Code", Type = typeof(string) },
new Column { Name = "Tester_Mode", Type = typeof(string) },
new Column { Name = "Event_Name", Type = typeof(string) },
new Column { Name = "Test_Program", Type = typeof(string) },
new Column { Name = "Temperature", Type = typeof(int?) },
new Column { Name = "Lot_Size", Type = typeof(int?) },
new Column { Name = "Part_Name", Type = typeof(string) },
new Column { Name = "Procedure_Name", Type = typeof(string) },
new Column { Name = "Handler_Id", Type = typeof(string) },
new Column { Name = "Perf_Board", Type = typeof(string) },
new Column { Name = "Sys_Part_Type", Type = typeof(string) },
new Column { Name = "Lot_Id", Type = typeof(string) },
new Column { Name = "Stage", Type = typeof(string) },
new Column { Name = "Parts_Tested", Type = typeof(int?) },
new Column { Name = "Parts_Failed", Type = typeof(int?) },
new Column { Name = "Reprobes", Type = typeof(int?) },
new Column { Name = "Successful_Reprobes", Type = typeof(int?) },
new Column { Name = "Delay_Time", Type = typeof(float?) },
new Column { Name = "UPH", Type = typeof(float?) },
new Column { Name = "Test_Time_Pass", Type = typeof(float?) },
new Column { Name = "Test_Time_Fail", Type = typeof(float?) },
new Column { Name = "Avg_Index_Time", Type = typeof(float?) },
new Column { Name = "Delays_30Sec_Avg", Type = typeof(float?) },
new Column { Name = "Delays_30Sec_Count", Type = typeof(int?) },
new Column { Name = "Delays_Count", Type = typeof(int?) },
new Column { Name = "Avg_Num_Sites", Type = typeof(float?) },
new Column { Name = "Active_Sites", Type = typeof(float?) },
new Column { Name = "Hour_Min_Sec", Type = typeof(string) },
new Column { Name = "Day_Month_Year", Type = typeof(string) },
new Column { Name = "User_Name", Type = typeof(string) },
new Column { Name = "Delays_Total_Duration", Type = typeof(float?) },
new Column { Name = "Duration_Since_Last_End_Lot", Type = typeof(float?) },
new Column { Name = "Start_Lot_Time_Data_Entry", Type = typeof(float?) },
new Column { Name = "Employee_Id", Type = typeof(string) },
new Column { Name = "Valid_Flag", Type = typeof(int?) },
new Column { Name = "Sample_Rate", Type = typeof(int?) },
new Column { Name = "Handler_Cycles", Type = typeof(int?) },
new Column { Name = "Site_1_Only_Pass_Only_Avg_Test_Time", Type = typeof(float?) },
new Column { Name = "Site_1_Only_Pass_Only_Count", Type = typeof(int?) },
new Column { Name = "Site_1_Count", Type = typeof(int?) },
new Column { Name = "Site_1_Yield", Type = typeof(float?) },
new Column { Name = "Site_2_Only_Pass_Only_Avg_Test_Time", Type = typeof(float?) },
new Column { Name = "Site_2_Only_Pass_Only_Count", Type = typeof(int?) },
new Column { Name = "Site_2_Count", Type = typeof(int?) },
new Column { Name = "Site_2_Yield", Type = typeof(float?) },
new Column { Name = "Site_3_Only_Pass_Only_Avg_Test_Time", Type = typeof(float?) },
new Column { Name = "Site_3_Only_Pass_Only_Count", Type = typeof(int?) },
new Column { Name = "Site_3_Count", Type = typeof(int?) },
new Column { Name = "Site_3_Yield", Type = typeof(float?) },
new Column { Name = "Site_4_Only_Pass_Only_Avg_Test_Time", Type = typeof(float?) },
new Column { Name = "Site_4_Only_Pass_Only_Count", Type = typeof(int?) },
new Column { Name = "Site_4_Count", Type = typeof(int?) },
new Column { Name = "Site_4_Yield", Type = typeof(int?) },
};
csv.MissingFieldAction = MissingFieldAction.ReplaceByNull;
csv.SkipEmptyLines = false;
csv.DefaultParseErrorAction = ParseErrorAction.RaiseEvent;
csv.ParseError += Csv_ParseError;
while (csv.ReadNextRecord())
{
for (int i = 0; i < 54; i++)
Console.Write(string.Format(i + ". {0} |", string.IsNullOrEmpty(csv[i]) ? "MISSING" : csv[i]));
Console.WriteLine();
}
处理遗失字段:
private static void Csv_ParseError(object sender, ParseErrorEventArgs e)
{
if (e.Error is MissingFieldCsvException)
{
e.Action = ParseErrorAction.AdvanceToNextLine;
}
}
答案 0 :(得分:0)
你应该使用if(csv.count == 54)来包装你的for循环,以便在没有进入循环的情况下检测行是否有效,之后你可以使用专用的if指定每个字段错误,如Delay_Code,一切都取决于你想要的逻辑。
答案 1 :(得分:0)
最后,我没有使用任何CSV库。我刚刚做了这个Variable Column CSV file processing C#,它就像魅力一样。我还创建了一个DataTable,然后使用SQLBulkCopy将其写入服务器。