我有一项任务是在2个表中生成并插入大约1&000; 000' 000行。困难在于我必须插入现有的外键,所以我首先从其他表中获取数据... 而且我试图用1000000行生成新的DataTable用SqlBulk粘贴它......但是这几代花了太长时间(每分钟大约4000行!)这里有代码,也许有人可以帮助优化它...
public void GenerateData(DateTime fromDate, DateTime toDate, int Size)
{
DataTable theDatetime = new DataTable();
theDatetime.Columns.Add("Datetime", new DateTime().GetType());
theDatetime.Columns.Add("Date", new DateTime().GetType());
theDatetime.Columns.Add("Hour", new Int32().GetType());
DataTable theOverspeed = new DataTable();
theOverspeed.Columns.Add("CarNumber", new Int32().ToString().GetType());
theOverspeed.Columns.Add("Datetime", new DateTime().GetType());
theOverspeed.Columns.Add("DistrictCode", new Int32().GetType());
theOverspeed.Columns.Add("MarkCode", new Int32().GetType());
theOverspeed.Columns.Add("OwnerCode", new Int32().GetType());
theOverspeed.Columns.Add("RecognitorNumber", new Int32().GetType());
theOverspeed.Columns.Add("Region", new Int32().GetType());
theOverspeed.Columns.Add("RouteCode", new Int32().GetType());
theOverspeed.Columns.Add("Overspeed", new Int32().GetType());
theOverspeed.Columns.Add("Speed", new Int32().GetType());
Dictionary<DateTime, DateTime> Dates = new Dictionary<DateTime, DateTime>();
Random TheGenerator = new Random();
DataTable theCars = getCars();
DataTable theRecognitors = getRecognitors();
int[] sizes = new int[5];
for(int step = 0; step < 5; ++step)
{
DateTime Current1 = GetFifthDate(fromDate, toDate)[step];
DateTime Current2 = GetFifthDate(fromDate, toDate)[step+1];
int CurrentCount = GetFifthCount(Size)[step];
for (int i = 0; i < CurrentCount; ++i)
{
var aDate = GetRandomDate(Current1, Current2);
if (!Dates.ContainsKey(aDate))
{
InsertFact(theCars, aDate, theRecognitors,
theDatetime, theOverspeed);
Dates.Add(aDate, aDate);
}
else
{
i--;
}
}
}
和InsertFact方法
private void InsertFact(DataTable theCars,
DateTime theDate,
DataTable theRecognitor, DataTable theDatetime, DataTable theOverspeed)
{
Random theGenerator = new Random(DateTime.Now.Millisecond);
DataRow rowDate = theDatetime.NewRow();
rowDate["DateTime"] = theDate;
rowDate["Date"] = theDate.Date;
rowDate["Hour"] = theDate.Hour;
theDatetime.Rows.Add(rowDate);
int car = theGenerator.Next(theCars.Rows.Count);
int recognitor = theGenerator.Next(theRecognitor.Rows.Count);
DataRow rowOverspeed = theOverspeed.NewRow();
rowOverspeed["CarNumber"] = theCars.Rows[car][4];
rowOverspeed["Datetime"] = theDate;
rowOverspeed["DistrictCode"] = theRecognitor.Rows[recognitor][3];
rowOverspeed["MarkCode"] = theCars.Rows[car][3];
rowOverspeed["OwnerCode"] = theCars.Rows[car][2];
rowOverspeed["RecognitorNumber"] = theRecognitor.Rows[recognitor][2];
rowOverspeed["Region"] = theCars.Rows[car][1];
rowOverspeed["RouteCode"] = theRecognitor.Rows[recognitor][1];
rowOverspeed["Overspeed"] = theGenerator.Next(10, 40);
rowOverspeed["Speed"] = theGenerator.Next(100) > 40 ? 70 : 90;
theOverspeed.Rows.Add(rowOverspeed);}
GetRecognitors
private DataTable getRecognitors()
{
DataTable Result = new DataTable("Recognitors");
using (SqlConnection theConnection = new SqlConnection(str))
{
theConnection.Open();
SqlDataAdapter theAdapter = new SqlDataAdapter();
SqlCommand theCommand = new SqlCommand("Select * from Recognitor", theConnection);
theCommand.CommandType = System.Data.CommandType.Text;
theCommand.CommandTimeout = 0;
theAdapter.SelectCommand = theCommand;
theAdapter.Fill(Result);
}
return Result;
}
GetCars
private DataTable getCars()
{
DataTable Result = new DataTable("Cars");
using (SqlConnection theConnection = new SqlConnection(str))
{
theConnection.Open();
SqlDataAdapter theAdapter = new SqlDataAdapter();
SqlCommand theCommand = new SqlCommand("Select * from Car", theConnection);
theCommand.CommandType = System.Data.CommandType.Text;
theCommand.CommandTimeout = 0;
theAdapter.SelectCommand = theCommand;
theAdapter.Fill(Result);
}
return Result;
}
答案 0 :(得分:2)
在我看来,你的代码有太多的空闲传递,并且在数据生成期间它们的数量增加了。看看这两行:
for (int i = 0; i < CurrentCount; ++i)
{
var aDate = GetRandomDate(Current1, Current2);
if (!Dates.ContainsKey(aDate))
{
// insert
}
else
{
i--;
}
}
您生成的数据越多,条件Dates.ContainsKey
评估为真的次数就越多。由于生成数字的正态分布,生成唯一数字所需的时间以非线性方式增加。
您肯定应该修改用于生成日期的方法。
P.S。 Dictionary<DateTime, DateTime> Dates
=&gt; HashSet<DateTime>
。