读取CSV文件,修剪/过滤数据,然后存储到C#中的数据库中

时间:2017-01-09 22:35:19

标签: c# asp.net .net csv lumenworks

我正在使用C#中的lumenwork csv reader读取csv,并将csv文件的内容放在各自的数据库表列中。代码如下,它工作正常

private void ButtonClick(DeliverData context)
     {
        // Set culture for decimal cast
        CultureInfo currentCulture = CultureInfo.DefaultThreadCurrentCulture;
        CultureInfo usCulture = new CultureInfo("en-US");
        CultureInfo.DefaultThreadCurrentCulture = usCulture;
        DateTime start = DateTime.Now;

        Dictionary<Cycle, FileInfo> files = this.GetFilesFromFolder(context, this.FPath);



        // Initiate sql connection
        SqlConnection sqlConnection = new SqlConnection(context.Database.Connection.ConnectionString);
        this.sqlElementsNumber = true;

        try
        {
            CsvReader reader = null;
            sqlConnection.Open();

            // Create temp table
            SqlCommand createTempTable = new SqlCommand();
            createTempTable.CommandText = TempTables.DataTempTable;
            createTempTable.Connection = sqlConnection;
            createTempTable.ExecuteNonQuery();

            this.DeleteOldData(context, files);

            foreach (KeyValuePair<Cycle, FileInfo> file in files)
            {
                // Sql Bulk Copy Code
                reader = new CsvReader(new StreamReader(file.Value.FullName), false);
                this.currentFilePath = file.Value.FullName;

                this.bulkCopyElements = reader.ToArray().Length;
                reader.Dispose();
                reader = new CsvReader(new StreamReader(file.Value.FullName), false);


                reader.Columns = new List<Column>
                {
                    new Column { Name = "Sector", Type = typeof(string) },
                    new Column { Name = "Sub Sector", Type = typeof(string) },
                    new Column { Name = "Category", Type = typeof(string) },
                    new Column { Name = "Brand", Type = typeof(string) },
                    new Column { Name = "Property1", Type = typeof(string) },
                    new Column { Name = "Property2", Type = typeof(string) },
                    new Column { Name = "Property3", Type = typeof(string) },
                    new Column { Name = "Property4", Type = typeof(string) },
                    new Column { Name = "Property5", Type = typeof(string) }

                };

                reader.MoveTo(0);
                reader.UseColumnDefaults = true;



                using (var sbc = new SqlBulkCopy(sqlConnection))
                {
                    sbc.DestinationTableName = "Tablename";
                    sbc.BatchSize = 1000;
                    sbc.EnableStreaming = true;
                    sbc.NotifyAfter = 100;
                    sbc.SqlRowsCopied += new SqlRowsCopiedEventHandler(this.HandleBulkMessage);


                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Sector", "Sector"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Sub Sector", "SubSector"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Category", "Category"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property1", "Property1"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property2", "Property2"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property3", "Property3"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property4", "Property4"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property5", "Property5"));

                    sbc.WriteToServer(reader);
                }

                reader.Dispose();
            }

            // start stored procedure
            SqlCommand cmd = new SqlCommand();
            SqlInfoMessageEventHandler handler = new SqlInfoMessageEventHandler(this.HandleSqlMessage);
            sqlConnection.FireInfoMessageEventOnUserErrors = true;
            sqlConnection.InfoMessage += handler;

            cmd.CommandText = "StoredprocedureName";
            cmd.CommandType = CommandType.StoredProcedure;
            cmd.CommandTimeout = 43200;
            cmd.Connection = sqlConnection;

            try
            {
                cmd.ExecuteNonQuery();
            }
            catch (SqlException e)
            {
                this.AddErrorMessage(e.Message, e);
            }

            sqlConnection.FireInfoMessageEventOnUserErrors = false;
            sqlConnection.InfoMessage -= handler;
            sqlConnection.Close();

            // reset culture
            CultureInfo.DefaultThreadCurrentCulture = currentCulture;
            Logger.Write(new CustomLogEntry(this.ErrorTitle, CustomLogEntry.LogPriority.High, "Time for File insert: " + DateTime.Now.Subtract(start).ToString(), this.ForecastPath));
        }
        catch (Exception e)
        {
            sqlConnection.Close();
            this.AddErrorMessage(e.Message, e);
        }
    }

CSV文件看起来像这样

CSV file looks like this

现在我想在输入数据库中的内容之前过滤csv的数据。例如 我需要从所有列中删除“ID”关键字,然后从所有属性列中删除“PPTR * _”。只要没有数据,就需要编写&#39; nodata&#39;

我的问题是实现这一目标的不同方法是什么?如何?

1 个答案:

答案 0 :(得分:1)

如果要在将数据输入数据库之前更新数据并且CSV文件不大,请考虑将数据加载到数据表中

//CsvReader reader = null;
DataTable tblCSV = new DataTable("CSV");
...
reader = new CsvReader(new StreamReader(file.Value.FullName), false);
...
tblCSV.Load(reader); 

foreach(DataRow dr in table.Rows)  
{
   dr["Sector"] = dr["Sector"].ToString().Replace(" ID", ""); 
}

...
//sbc.WriteToServer(reader);
sbc.WriteToServer(tblCSV);

请注意,对于大型文件,它可能会很慢,并且在加载到数据库之后更新数据可能是有意义的,特别是因为您已经有一个在此之后运行的代码。在StoredprocedureName的开头添加更新,然后添加更新。

示例:

UPDATE Tablename SET Sector=Replace(Sector, ' ID', '')

如果Tablename还包含不应再次更新的其他数据 - 请添加一个时间戳列,该列可以帮助您识别新加载的数据,或者使用另一个表,例如TablenameTemp,在那里加载数据,修改并将所有内容移动到Tablename。

更新

如果StoredprocedureName仅用于该单一目的(不从其他代码调用),则只需在其代码的开头添加更新查询,例如

ALTER PROCEDURE StoredprocedureName
AS
BEGIN
UPDATE Tablename 
   SET Sector=Replace(Sector, ' ID', ''),
   [Product Property1]=Replace([Product Property1], 'PPTR1_', ''),
   [Product Property2]=Replace([Product Property2], 'PPTR2_', ''),
   ...

...rest of sql code

注意,Replace()将替换所有出现的问题,因此请确保它是否能正常工作,或者您需要更复杂的逻辑来摆脱指定的字符串。在将sql放入存储过程之前,可以直接在数据库中测试它。