嵌套循环到IDataReader

时间:2017-03-23 15:11:24

标签: c#

我有一个程序使用DataTable将巨大的SqlBulkCopy(2.000.000到70.000.000行,具体取决于配置)写入数据库。

我决定将填充此表格的循环更改为IDataReader,因为行数通常会导致OutOfMemoryException

表格填充如下

// int[] firsts;
// string[] seconds;
// byte[] thirds;
var table = new DataTable();
foreach(var f in firsts)
{
    foreach(var s in seconds)
    {
        foreach(var t in thirds)
        {
            var row = table.NewRow();
            row[0] = f;
            row[1] = s;
            row[2] = t;
            table.Rows.Add(row);
        }
    }
    // here I also bulk load the table and clear it
}

所以在我的IDataReader类中,我将按索引循环。这是我的尝试。

class TableReader : IDataReader
{
    bool Eof = false;

    int FirstIndex;
    int SecondIndex;
    int ThirdIndex;

    //those are populated via constructor
    int[] firsts;
    string[] seconds;
    byte[] thirds;

    // this will be retrieved automatically via indexer
    object[] Values;

    public bool Read()
    {
        if(ThirdIndex != thirds.Length
            && SecondIndex < seconds.Length
            && FirstIndex < firsts.Length)
        {
            Values[0] = firsts[FirstIndex];
            Values[1] = seconds[SecondIndex];
            Values[2] = thirds[ThirdIndex++];
        }
        else if(SecondIndex != seconds.Length)
        {
            ThirdIndex  = 0;
            SecondIndex++;
        }
        else if(FirstIndex != firsts.Length)
        {
            SecondIndex = 0;
            FirstIndex++;
        }
        else
        {
            Eof = true;
        }
        return !Eof;
    }
}

我已经使用while(true)循环创建了此代码,其中包含中断而不是Eof,但我似乎无法弄清楚如何执行此操作。

任何人都可以提供帮助吗?

1 个答案:

答案 0 :(得分:0)

如果您实现IDataReader并使用“yield return”关键字来提供行,这实际上是可行的。 IDataReader实现起来有点痛苦,但它并不复杂。下面的代码可以调整为将数TB的数据加载到数据库中,永远不会耗尽内存。

  • 我将DataRow对象替换为在整个数据读取过程中重复使用的单个对象数组。
  • 因为没有DataTable对象来表示列,所以我必须自己分别存储数据类型和列名。

    class TestDataReader : IDataReader {
        int[] firsts = { 1, 2, 3, 4 };
        string[] seconds = { "abc", "def", "ghi" };
        byte[] thirds = { 0x30, 0x31, 0x32 };
    
        // The data types of each column.
        Type[] dataTypes = { typeof(int), typeof(string), typeof(byte) };
    
        // The names of each column.
        string[] names = { "firsts", "seconds", "thirds" };
    
        // This function uses coroutines to turn the "push" approach into a "pull" approach.
        private IEnumerable<object[]> GetRows() {
            // Just re-use the same array. 
            object[] row = new object[3];
            foreach (var f in firsts) {
                foreach (var s in seconds) {
                    foreach (var t in thirds) {
                        row[0] = f;
                        row[1] = s;
                        row[2] = t;
                        yield return row;
                    }
                }
                // here I also bulk load he table and clear it
            }
        }
    
    
        // Everything below basically wraps this.
        IEnumerator<object[]> rowProvider;
    
        public TestDataReader() {
            rowProvider = GetRows().GetEnumerator();
        }
    
        public object this[int i] {
            get {
                return GetValue(i);
            }
        }
    
        public object this[string name] {
            get {
                return GetValue(GetOrdinal(name));
            }
        }
    
        public int  Depth                       { get { return 0;                               } }
        public int  FieldCount                  { get { return dataTypes.Length;                } }
        public bool IsClosed                    { get { return false;                           } }
        public int  RecordsAffected             { get { return 0;                               } }
    
        // These don't really do anything.
        public void Close()                     { Dispose();                                    }
        public void Dispose()                   { rowProvider.Dispose();                        }
    
        public string   GetDataTypeName(int i)  { return dataTypes[i].Name;                     }
        public Type     GetFieldType(int i)     { return dataTypes[i];                          }
    
        // These functions get basic data types.
        public bool     GetBoolean(int i)       { return (bool)     rowProvider.Current[i];     }
        public byte     GetByte(int i)          { return (byte)     rowProvider.Current[i];     }
        public char     GetChar(int i)          { return (char)     rowProvider.Current[i];     }
        public DateTime GetDateTime(int i)      { return (DateTime) rowProvider.Current[i];     }
        public decimal  GetDecimal(int i)       { return (decimal)  rowProvider.Current[i];     }
        public double   GetDouble(int i)        { return (double)   rowProvider.Current[i];     }
        public float    GetFloat(int i)         { return (float)    rowProvider.Current[i];     }
        public Guid     GetGuid(int i)          { return (Guid)     rowProvider.Current[i];     }
        public short    GetInt16(int i)         { return (short)    rowProvider.Current[i];     }
        public int      GetInt32(int i)         { return (int)      rowProvider.Current[i];     }
        public long     GetInt64(int i)         { return (long)     rowProvider.Current[i];     }
        public string   GetString(int i)        { return (string)   rowProvider.Current[i];     }
        public object   GetValue(int i)         { return (object)   rowProvider.Current[i];     }
        public string   GetName(int i)          { return names[i];                              }
    
        public bool IsDBNull(int i) {
            object obj = rowProvider.Current[i];
            return obj == null || obj is DBNull;
        }
    
        // Looks up a field number given its name.
        public int GetOrdinal(string name) {
            return Array.FindIndex(names, x => x.Equals(name, StringComparison.OrdinalIgnoreCase));
        }
    
        // Populate "values" given the current row of data.
        public int GetValues(object[] values) {
            if (values == null) {
                return 0;
            } else {
                int len = Math.Min(values.Length, rowProvider.Current.Length);
                Array.Copy(rowProvider.Current, values, len);
                return len;
            }
        }
    
        // This reader only supports a single result set.
        public bool NextResult() {
            return false;
        }
    
        // Move to the next row.
        public bool Read() {
            return rowProvider.MoveNext();
        }
    
        // Don't bother implementing these in any meaningful way.
        public long GetBytes(int i, long fieldOffset, byte[] buffer, int bufferoffset, int length) {
            throw new NotImplementedException();
        }
    
        public long GetChars(int i, long fieldoffset, char[] buffer, int bufferoffset, int length) {
            throw new NotImplementedException();
        }
    
        public IDataReader GetData(int i) {
            throw new NotImplementedException();
        }
    
        public DataTable GetSchemaTable() {
            return null;
        }
    
    }