我正在处理一个大型CSV数据文件,该文件正被解析成这样的对象图...
Company
- Address
- References []
- Ref 1
这是一个自定义解析器,它对源文件中大约2GB的数据进行逐行解析。
最终结果是将对象分成3个不同的批次,并分配给SqlBulkCopy类以插入Sql。
随着时间的流逝,Sql逐渐变慢,直到最终超时。
我可以增加超时时间,但是要做的就是将异常推回原处,并花费更长的时间才能到达……尽管最终仍然会受到影响,但过程会更进一步。
是否有办法在SQL Server中的SqlBulkCopy /上的连接/上强制执行恒定的性能规则或某种方式,这给了我某种“固定的响应时间”,因为我的所有批次都具有已知的固定大小,并且操作是只是重复同一操作,只是具有更多数据?
我预计整个操作的总行数约为6000万。
这就是我的做法...
using (var parser = new MappedCSVParser<Company>(config, map, new BufferedStream(File.Open(sourceFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))))
{
var data = parser.Parse();
Company[] companies = data.Take(batchSize).Where(c => c.IsActive).ToArray();
Address[] addresses = companies.Select(c => c.Address).ToArray();
CompanyReference[] refs = companies.SelectMany(c => c.References).ToArray();
using (var conn = new SqlConnection(connStr))
{
using (var addCopier = new SqlBulkCopier<Address>(conn))
using (var comCopier = new SqlBulkCopier<Company>(conn))
using (var refCopier = new SqlBulkCopier<CompanyReference>(conn))
{
var addressReader = new ObjectDataReader<Address>();
var companyReader = new ObjectDataReader<Company>();
var refReader = new ObjectDataReader<CompanyReference>();
await conn.OpenAsync();
while (companies.Any())
{
addressReader.SetSource(addresses);
companyReader.SetSource(companies);
refReader.SetSource(refs);
await addCopier.WriteToServer(addressReader);
await comCopier.WriteToServer(companyReader);
await refCopier.WriteToServer(refReader);
batchesSent++;
companiesSent += companies.Length;
rejections += batchSize - companies.Length;
Log($"Progress - Batches Sent: {batchesSent}, Companies Sent: {companiesSent}, Rejected: {rejections}");
companies = data.Take(batchSize).Where(c => c.IsActive).ToArray();
addresses = companies.Select(c => c.Address).ToArray();
refs = companies.SelectMany(c => c.References).ToArray();
}
conn.Close();
}
}
}
编辑:
这里要求的是我的SqlBulkCopy实现...
class SqlBulkCopier<T> : IDisposable
{
SqlBulkCopy copier;
SqlConnection connection;
bool internalConnection = false;
public SqlBulkCopier(string connStr) : this(new SqlConnection(connStr))
{
internalConnection = true;
}
public SqlBulkCopier(SqlConnection conn)
{
connection = conn;
var flags = SqlBulkCopyOptions.KeepNulls | SqlBulkCopyOptions.CheckConstraints | SqlBulkCopyOptions.TableLock | SqlBulkCopyOptions.KeepIdentity | SqlBulkCopyOptions.UseInternalTransaction;
copier = new SqlBulkCopy(connection, flags, null);
copier.BulkCopyTimeout = 180;
foreach (var f in typeof(T).GetProperties().Where(p => p.PropertyType.IsValueType || p.PropertyType == typeof(string)))
copier.ColumnMappings.Add(f.Name, f.Name);
var tableInfo = typeof(T).GetCustomAttributes(typeof(TableAttribute), true)[0] as TableAttribute;
copier.DestinationTableName = $"{tableInfo.Schema ?? "dbo"}.{tableInfo.Name}";
}
public Task Connect() { return connection.OpenAsync(); }
public void Disconnect() { connection.Close(); }
public Task WriteToServer(ObjectDataReader<T> reader)
{
return copier.WriteToServerAsync(reader);
}
public void Dispose()
{
if (copier != null)
{
if(connection.State == System.Data.ConnectionState.Open) connection.Close();
if(internalConnection) connection.Dispose();
copier = null;
connection = null;
}
}
}