我首先检索表格中的总行数(比如100),然后将它们分成块(比如说25)。然后我创建一个任务(taskFetch),它使用Parallel.Foreach()方法从MyTable中以块的形式将行提取到DataTable(每个包含25个记录)。还有另一个嵌套的Parallel,Foreach(),它使用Partitioner.Create()从每个DataTable中检索数据并将其添加到Blocking Collection(sourceCollection)中。出于测试目的,我在控制台上输出结果,这很好。
但是,当我尝试从sourceCollection中检索数据时,我发现重复的条目。实际上我在表中有超过1500,000条记录。我并不认为重复的条目是添加的,但我采取的方式是我有点怀疑。
代码
public async Task BulkMigrationAsync(string clearPVK, string EncZPK)
{
BlockingCollection<MigrationObject> sourceCollection = new BlockingCollection<MigrationObject>();
List<Task> tasksGeneratedPinBlock = new List<Task>();
int rangeFrom = 1;
int recordsInSet = 25;
int rangeTo = 25;
int setsCount = 0;
Dictionary<int, Tuple<int, int>> chunks = new Dictionary<int, Tuple<int, int>>();
SqlConnection conn = new SqlConnection(_Database.ConnectionString);
SqlCommand cmd = new SqlCommand("SELECT COUNT(*) FROM MyTable); // Suppose If retrieves 100 rows
// getting total records from MyTable
using (conn)
{
conn.Open();
setsCount = Convert.ToInt32(cmd.ExecuteScalar()) / recordsInSet ; // then setsCount will be 4
conn.Close();
}
for (int i = 0; i < setsCount; i++)
{
chunks.Add(i, new Tuple<int, int>(rangeFrom, rangeTo));
rangeFrom = rangeTo + 1;
rangeTo = rangeTo + recordsInSet;
}
// Each chunk would contain 100000 records to be preocessed later
// chunks => {0, (1, 25)}
// {1, (26, 50)} // a chunk, chunk.Value.Item1 = 26 and chunk.Value.Item2 = 50
// {2, (51, 75)}
// {3, (76, 100)}
// fetching results in dataTable from DB in chunks and ADDING to sourceCollection
Task taskFetch = Task.Factory.StartNew(() =>
{
Parallel.ForEach(chunks, (chunk) =>
{
DataTable dt = new DataTable();
SqlConnection localConn = new SqlConnection(_Database.ConnectionString);
string command = @"SELECT * FROM ( SELECT RELATIONSHIP_NUM, CUST_ID, CODE, BLOCK_NEW, ROW_NUMBER() over (
order by RELATIONSHIP_NUM, CUST_ID) as RowNum FROM MyTable) SUB
WHERE SUB.RowNum BETWEEN chunk.Value.Item1 AND chunk.Value.Item2";
SqlDataAdapter da = new SqlDataAdapter(command, localConn);
try
{
using (da)
using (localConn)
{
da.Fill(dt);
}
}
finally
{
if (localConn.State != ConnectionState.Closed)
localConn.Close();
localConn.Dispose();
}
Parallel.ForEach(Partitioner.Create(0, dt.Rows.Count),
(range, state) =>
{
MigrationObject migSource = new MigrationObject();
for (int i = range.Item1; i < range.Item2; i++)
{
migSource.PAN = dt.Rows[i]["CUST_ID"].ToString();
migSource.PinOffset = dt.Rows[i]["CODE"].ToString();
migSource.PinBlockNew = dt.Rows[i]["BLOCK_NEW"].ToString();
migSource.RelationshipNum = dt.Rows[i]["RELATIONSHIP_NUM"].ToString();
Console.WriteLine(@"PAN " + migSource.PAN + " Rel " + migSource.RelationshipNum
+ " for ranges : " + range.Item1 + " TO " + range.Item2);
sourceCollection.TryAdd(migSource);
}
});
});
});
await taskFetch;
sourceCollection.CompleteAdding();
while (!sourceCollection.IsCompleted)
{
MigrationObject mig;
if (sourceCollection.TryTake(out mig)) // Seems to be the problem area because may be im not handling out
{
await Task.Delay(50);
Console.WriteLine(" Rel " + mig.RelationshipNum + " PAN " + mig.PAN);
}
}
}
答案 0 :(得分:0)
我的不好,实际上问题的区域是:
Parallel.ForEach(Partitioner.Create(0, dt.Rows.Count),
(range, state) =>
{
MigrationObject migSource = new MigrationObject(); // creating the object outside For loop.
for (int i = range.Item1; i < range.Item2; i++)
{
migSource.PAN = dt.Rows[i]["CUST_ID"].ToString();
migSource.PinOffset = dt.Rows[i]["CODE"].ToString();
migSource.PinBlockNew = dt.Rows[i]["BLOCK_NEW"].ToString();
migSource.RelationshipNum = dt.Rows[i]["RELATIONSHIP_NUM"].ToString();
Console.WriteLine(@"PAN " + migSource.PAN + " Rel " + migSource.RelationshipNum
+ " for ranges : " + range.Item1 + " TO " + range.Item2);
sourceCollection.TryAdd(migSource);
}
});
相反,我应该包括&#39; MigrationObject migSource = new MigrationObject();&#39;在For循环中:
Parallel.ForEach(Partitioner.Create(0, dt.Rows.Count),
(range, state) =>
{
for (int i = range.Item1; i < range.Item2; i++)
{
MigrationObject migSource = new MigrationObject();
migSource.PAN = dt.Rows[i]["CUST_ID"].ToString();
migSource.PinOffset = dt.Rows[i]["CODE"].ToString();
migSource.PinBlockNew = dt.Rows[i]["BLOCK_NEW"].ToString();
migSource.RelationshipNum = dt.Rows[i]["RELATIONSHIP_NUM"].ToString();
Console.WriteLine(@"PAN " + migSource.PAN + " Rel " + migSource.RelationshipNum
+ " for ranges : " + range.Item1 + " TO " + range.Item2);
sourceCollection.TryAdd(migSource);
}
});