我有一个程序使用DataTable
将巨大的SqlBulkCopy
(2.000.000到70.000.000行,具体取决于配置)写入数据库。
我决定将填充此表格的循环更改为IDataReader
,因为行数通常会导致OutOfMemoryException
。
表格填充如下
// int[] firsts;
// string[] seconds;
// byte[] thirds;
var table = new DataTable();
foreach(var f in firsts)
{
foreach(var s in seconds)
{
foreach(var t in thirds)
{
var row = table.NewRow();
row[0] = f;
row[1] = s;
row[2] = t;
table.Rows.Add(row);
}
}
// here I also bulk load the table and clear it
}
所以在我的IDataReader
类中,我将按索引循环。这是我的尝试。
class TableReader : IDataReader
{
bool Eof = false;
int FirstIndex;
int SecondIndex;
int ThirdIndex;
//those are populated via constructor
int[] firsts;
string[] seconds;
byte[] thirds;
// this will be retrieved automatically via indexer
object[] Values;
public bool Read()
{
if(ThirdIndex != thirds.Length
&& SecondIndex < seconds.Length
&& FirstIndex < firsts.Length)
{
Values[0] = firsts[FirstIndex];
Values[1] = seconds[SecondIndex];
Values[2] = thirds[ThirdIndex++];
}
else if(SecondIndex != seconds.Length)
{
ThirdIndex = 0;
SecondIndex++;
}
else if(FirstIndex != firsts.Length)
{
SecondIndex = 0;
FirstIndex++;
}
else
{
Eof = true;
}
return !Eof;
}
}
我已经使用while(true)
循环创建了此代码,其中包含中断而不是Eof
,但我似乎无法弄清楚如何执行此操作。
任何人都可以提供帮助吗?
答案 0 :(得分:0)
如果您实现IDataReader并使用“yield return”关键字来提供行,这实际上是可行的。 IDataReader实现起来有点痛苦,但它并不复杂。下面的代码可以调整为将数TB的数据加载到数据库中,永远不会耗尽内存。
因为没有DataTable对象来表示列,所以我必须自己分别存储数据类型和列名。
class TestDataReader : IDataReader {
int[] firsts = { 1, 2, 3, 4 };
string[] seconds = { "abc", "def", "ghi" };
byte[] thirds = { 0x30, 0x31, 0x32 };
// The data types of each column.
Type[] dataTypes = { typeof(int), typeof(string), typeof(byte) };
// The names of each column.
string[] names = { "firsts", "seconds", "thirds" };
// This function uses coroutines to turn the "push" approach into a "pull" approach.
private IEnumerable<object[]> GetRows() {
// Just re-use the same array.
object[] row = new object[3];
foreach (var f in firsts) {
foreach (var s in seconds) {
foreach (var t in thirds) {
row[0] = f;
row[1] = s;
row[2] = t;
yield return row;
}
}
// here I also bulk load he table and clear it
}
}
// Everything below basically wraps this.
IEnumerator<object[]> rowProvider;
public TestDataReader() {
rowProvider = GetRows().GetEnumerator();
}
public object this[int i] {
get {
return GetValue(i);
}
}
public object this[string name] {
get {
return GetValue(GetOrdinal(name));
}
}
public int Depth { get { return 0; } }
public int FieldCount { get { return dataTypes.Length; } }
public bool IsClosed { get { return false; } }
public int RecordsAffected { get { return 0; } }
// These don't really do anything.
public void Close() { Dispose(); }
public void Dispose() { rowProvider.Dispose(); }
public string GetDataTypeName(int i) { return dataTypes[i].Name; }
public Type GetFieldType(int i) { return dataTypes[i]; }
// These functions get basic data types.
public bool GetBoolean(int i) { return (bool) rowProvider.Current[i]; }
public byte GetByte(int i) { return (byte) rowProvider.Current[i]; }
public char GetChar(int i) { return (char) rowProvider.Current[i]; }
public DateTime GetDateTime(int i) { return (DateTime) rowProvider.Current[i]; }
public decimal GetDecimal(int i) { return (decimal) rowProvider.Current[i]; }
public double GetDouble(int i) { return (double) rowProvider.Current[i]; }
public float GetFloat(int i) { return (float) rowProvider.Current[i]; }
public Guid GetGuid(int i) { return (Guid) rowProvider.Current[i]; }
public short GetInt16(int i) { return (short) rowProvider.Current[i]; }
public int GetInt32(int i) { return (int) rowProvider.Current[i]; }
public long GetInt64(int i) { return (long) rowProvider.Current[i]; }
public string GetString(int i) { return (string) rowProvider.Current[i]; }
public object GetValue(int i) { return (object) rowProvider.Current[i]; }
public string GetName(int i) { return names[i]; }
public bool IsDBNull(int i) {
object obj = rowProvider.Current[i];
return obj == null || obj is DBNull;
}
// Looks up a field number given its name.
public int GetOrdinal(string name) {
return Array.FindIndex(names, x => x.Equals(name, StringComparison.OrdinalIgnoreCase));
}
// Populate "values" given the current row of data.
public int GetValues(object[] values) {
if (values == null) {
return 0;
} else {
int len = Math.Min(values.Length, rowProvider.Current.Length);
Array.Copy(rowProvider.Current, values, len);
return len;
}
}
// This reader only supports a single result set.
public bool NextResult() {
return false;
}
// Move to the next row.
public bool Read() {
return rowProvider.MoveNext();
}
// Don't bother implementing these in any meaningful way.
public long GetBytes(int i, long fieldOffset, byte[] buffer, int bufferoffset, int length) {
throw new NotImplementedException();
}
public long GetChars(int i, long fieldoffset, char[] buffer, int bufferoffset, int length) {
throw new NotImplementedException();
}
public IDataReader GetData(int i) {
throw new NotImplementedException();
}
public DataTable GetSchemaTable() {
return null;
}
}