我有一个Web应用程序,我在其中解析一个可能有超过200k记录的csv文件。我解析每一行的信息,验证数据库中不存在该键,然后将其添加到上下文中。当计数达到10,000条记录时,它会调用SaveChanges例程。问题是在上下文中可能存在重复,并且错误输出。这是在与Azure SQL服务器通信的Azure VM上运行。
两个问题,我如何处理重复的问题?有什么方法可以提高速度,因为它需要几个小时才能运行?
using (LoanFileEntities db = new LoanFileEntities())
{
db.Configuration.AutoDetectChangesEnabled = false; // 1. this is a huge time saver
db.Configuration.ValidateOnSaveEnabled = false; // 2. this can also save time
while (parser.Read())
{
counter++;
int loan_code = 0;
string loan_code_string = parser["LoanId"];
string dateToParse = parser["PullDate"].Trim();
DateTime date_pulled;
try
{
date_pulled = DateTime.Parse(dateToParse, CultureInfo.InvariantCulture);
}
catch (Exception)
{
throw new Exception("No Pull Date for line " + counter);
}
string originationdate = parser["OriginationDate"].Trim();
DateTime date_originated;
try
{
date_originated = DateTime.Parse(originationdate, CultureInfo.InvariantCulture);
}
catch (Exception)
{
throw new Exception("No Origination Date for line " + counter);
}
dateToParse = parser["DueDate"].Trim();
DateTime date_due;
try
{
date_due = DateTime.Parse(dateToParse, CultureInfo.InvariantCulture);
}
catch (Exception)
{
throw new Exception("No Due Date for line " + counter);
}
string region = parser["Region"].Trim();
string source = parser["Channel"].Trim();
string password = parser["FilePass"].Trim();
decimal principalAmt = Convert.ToDecimal(parser["Principal"].Trim());
decimal totalDue = Convert.ToDecimal(parser["TotalDue"].Trim());
string vitaLoanId = parser["VitaLoanId"];
var toAdd =
db.dfc_LoanRecords.Any(
x => x.loan_code_string == loan_code_string);
if (!toAdd)
{
dfc_LoanRecords loan = new dfc_LoanRecords();
loan.loan_code = loan_code;
loan.loan_code_string = loan_code_string;
loan.loan_principal_amt = principalAmt;
loan.loan_due_date = date_due;
loan.date_pulled = date_pulled;
loan.date_originated = date_originated;
loan.region = region;
loan.source = source;
loan.password = password;
loan.loan_amt_due = totalDue;
loan.vitaLoanId = vitaLoanId;
loan.load_file = fileName;
loan.load_date = DateTime.Now;
switch (loan.region)
{
case "UK":
if (location.Equals("UK"))
{
//db.dfc_LoanRecords.Add(loan);
if (loan.source == "Online")
{
counter_new_uk_online++;
}
else
{
counter_new_uk_retail++;
}
}
break;
case "US":
if (location.Equals("US"))
{
db.dfc_LoanRecords.Add(loan);
if (loan.source == "Online")
{
counter_new_us_online++;
}
else
{
counter_new_us_retail++;
}
}
break;
case "Canada":
if (location.Equals("US"))
{
db.dfc_LoanRecords.Add(loan);
if (loan.source == "Online")
{
counter_new_cn_online++;
}
else
{
counter_new_cn_retail++;
}
}
break;
}
// delay save to speed up load. 3. also saves transactional time
if (counter % 10000 == 0)
{
db.SaveChanges();
}
}
} // end of parser read
db.SaveChanges();
}
}
}
答案 0 :(得分:1)
我建议删除代码 之前的重复项 将其发送到.SaveChanges()。
我没有详细介绍重复删除,而是将这个链接列表放在StackOverflow的现有问题和答案中,这可能有所帮助:
希望有所帮助!