我有一个非常棒的SqlDataReader包装器,我可以在其中将输出映射到强类型列表。
我现在发现的是,对于列数较多的较大数据集,如果我可以优化我的映射,性能可能会好一些。
在考虑这一点时,我特别关注一个部分,因为它似乎是最重的击球手
我真的想知道的是,是否有一种方法可以使这个循环异步?我觉得这将使这个世界与众不同:)
以下是整个Map
方法,万一有人可以看到我可以在哪里进一步改进...
IList<T> Map<T>
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Data.Common;
using System.Data.SqlClient;
using System.Linq;
using System.Reflection;
using System.Threading.Tasks;
namespace o7th.Class.Library.Data
{
public class WrapperTest
{
public static string Message { set { _Msg = value; } get { return _Msg; } }
private static string _Msg;
// Instantiate our caching methods
internal static Common.CustomCache _Cache = new Common.CustomCache();
private static IEnumerable<T> Map<T>(SqlDataReader dr) where T : new()
{
var enumerableDataReader = dr.Cast<DbDataRecord>().AsEnumerable();
var tObj = new T();
PropertyInfo[] propertyInfo = tObj.GetType().GetProperties();
var batches = enumerableDataReader.Batch(10000);
var resultCollection = new ConcurrentBag<List<T>>();
Parallel.ForEach(batches, batch => resultCollection.Add(MapThis<T>(propertyInfo, batch)));
return resultCollection.SelectMany(m => m.Select(x => x));
}
private static List<T> MapThis<T>(PropertyInfo[] propertyInfo, IEnumerable<DbDataRecord> batch) where T : new()
{
var list = new List<T>();
batch.AsParallel().ForAll(record =>
{
T obj = new T();
foreach (PropertyInfo prop in propertyInfo)
{
var dbVal = record[prop.Name];
if (!Equals(dbVal, DBNull.Value))
{
prop.SetValue(obj, dbVal, null);
}
}
list.Add(obj);
});
return list;
}
public static IEnumerable<T> GetResults<T>(string _Qry, System.Data.CommandType _QryType,
string[] _ParamNames = null,
object[] _ParamVals = null,
System.Data.SqlDbType[] _ParamDTs = null,
bool _ShouldCache = false,
string _CacheID = "") where T : new()
{
// Create a reference to a potential already cached IList
IEnumerable<T> _CachedItem = _Cache.Get<IEnumerable<T>>(_CacheID);
// If we're already cached, there's no need to fire up the data access objects, so return the cached item instead
if (_CachedItem != null && _ShouldCache)
{
return _CachedItem;
}
else
{
// Fire up our data access object
using (Access db = new Access())
{
try
{
// create a new ilist reference of our strongly typed class
IEnumerable<T> _Query = null;
// set the query type
db.QueryType = _QryType;
// set the query text
db.Query = _Qry;
// make sure we've got some parameters, if we do the set them to our db access object
if (_ParamNames != null)
{
// set the parameter names
db.ParameterNames = _ParamNames;
// set the parameter values
db.ParameterValues = _ParamVals;
// set the parameter data types
db.ParameterDataTypes = _ParamDTs;
}
// start using our db access :) Fire off the GetResults method and return back a SqlDataReader to work on
using (SqlDataReader r = db.GetResults())
{
// make sure the data reader actually exists and contains some results
if (r != null && r.HasRows)
{
// map the data reader to our strongly type(s)
_Query = Map<T>(r);
}
}
// check if we should cache the results
if (_ShouldCache)
{
// if so, set the query object to the cache
_Cache.Set<IEnumerable<T>>(_Query, _CacheID);
}
// return our strongly typed list
return _Query;
}
catch (Exception ex)
{
// Catch an exception if any, an write it out to our logging mechanism, in addition to adding it our returnable message property
_Msg += "Wrapper.GetResults Exception: " + ex.Message + db.Message;
ErrorReporting.WriteEm.WriteItem(ex, "o7th.Class.Library.Data.Wrapper.GetResults", _Msg);
// make sure this method returns a default List
return default(IList<T>);
}
}
}
}
}
public static class Extensions
{
/// <summary>
/// Take a collection and split it into smaller collections
/// </summary>
/// <typeparam name="T">The Type</typeparam>
/// <param name="collection">The collection to split</param>
/// <param name="batchSize">The size of each batch</param>
/// <returns></returns>
public static IEnumerable<IEnumerable<T>> Batch<T>(this IEnumerable<T> collection, int batchSize)
{
var nextbatch = new List<T>(batchSize);
if (collection == null)
{
yield break;
}
foreach (T item in collection)
{
nextbatch.Add(item);
if (nextbatch.Count != batchSize)
{
continue;
}
yield return nextbatch;
nextbatch = new List<T>(batchSize);
}
if (nextbatch.Count > 0)
{
yield return nextbatch;
}
}
}
}
使用SqlClient.SqlDataReader db.GetResults()
是一个简单的ExecuteReader
P.S。这是我的第一个c#项目。我很长时间基本/ qbasic / vb程序员=)
这是我的Test ConsoleApp:
using o7th.Class.Library.Data;
using System;
using System.Collections.Generic;
using System.Threading;
namespace Testing
{
class Program
{
static void Main(string[] args)
{
long startTime = DateTime.Now.Ticks;
IList<Typing> _T = Wrapper.GetResults<Typing>("List.ZipSearch",
System.Data.CommandType.StoredProcedure,
new string[]{"@ZipCode", "@RadiusMile"},
new object[] { "01020", 10000 },
new System.Data.SqlDbType[] { System.Data.SqlDbType.VarChar, System.Data.SqlDbType.Float},
true, "TestCache1");
long endTime = DateTime.Now.Ticks;
TimeSpan timeTaken = new TimeSpan(endTime - startTime);
Console.WriteLine("Task Took: " + timeTaken + " for: " + _T.Count + " records.");
Thread.Sleep(2000);
long startTime2 = DateTime.Now.Ticks;
IEnumerable<Typing> _T2 = WrapperTest.GetResults<Typing>("List.ZipSearch",
System.Data.CommandType.StoredProcedure,
new string[] { "@ZipCode", "@RadiusMile" },
new object[] { "01020", 10000 },
new System.Data.SqlDbType[] { System.Data.SqlDbType.VarChar, System.Data.SqlDbType.Float },
true, "TestCache2");
long endTime2 = DateTime.Now.Ticks;
TimeSpan timeTaken2 = new TimeSpan(endTime2 - startTime2);
Console.WriteLine("Task Took: " + timeTaken2 + " for: " + _T2 + " records.");
Console.WriteLine("");
Console.WriteLine("Press any key to continue...");
Console.ReadKey();
}
partial class Typing {
public long ZipID { get; set; }
public string ZipCode { get; set; }
public string City { get; set; }
public string State { get; set; }
public string County { get; set; }
public double Mileage { get; set; }
}
}
}
答案 0 :(得分:0)
如果我使用该代码,我将做的一个微小的改变是将if更改为仅在需要时使用PropertyInfo
设置(newObject已经是默认值(T)):
if ((info != null) && info.CanWrite && !(_Rdr.GetValue(i) is DBNull))
{
info.SetValue(newObject, _Rdr.GetValue(i), null);
break;
}
这将为您节省额外的默认调用(T),它还可以保存您使用自己的默认值覆盖newObject。这是一个TINY优化。此外,你看到多次覆盖newObject,所以这让我if
只有一次是真的,所以我添加了一个中断,以节省额外的枚举,假设一个大型数据集,可以节省一些时间也是。
这个怎么样?
var readerValue = _Rdr.GetValue(i);
if ((info != null) && info.CanWrite && !(readerValue is DBNull))
{
info.SetValue(newObject, readerValue, null);
break;
}
*编辑以添加更多代码。
不确定这是否会改善事情:
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Linq;
namespace ConsoleApplication1
{
internal class Program
{
private static readonly SqlToObjectReflectionMappingService MappingService = new SqlToObjectReflectionMappingService();
private static void Main(string[] args)
{
// Call ConvertTable here...
}
private static IEnumerable<T> ConvertTable<T>(DataTable dataTable) where T : new()
{
return MappingService.DataTableToObjects<T>(dataTable);
}
public class SqlToObjectReflectionMappingService : ISqlToObjectMappingService
{
public T DataRowToObject<T>(DataRow row, PropertyDescriptorCollection propertyDescriptorCollection)
where T : new()
{
var obj = new T();
foreach (PropertyDescriptor propertyDescriptor in propertyDescriptorCollection)
{
propertyDescriptor.SetValue(obj, row[propertyDescriptor.Name]);
}
return obj;
}
public IEnumerable<T> DataTableToObjects<T>(DataTable table) where T : new()
{
var obj = new T();
var props = TypeDescriptor.GetProperties(obj);
return table.AsEnumerable().AsParallel().Select(m => DataRowToObject<T>(m, props));
}
}
public interface ISqlToObjectMappingService
{
T DataRowToObject<T>(DataRow row, PropertyDescriptorCollection propertyDescriptorCollection) where T : new();
IEnumerable<T> DataTableToObjects<T>(DataTable table) where T : new();
}
}
}
*编辑以添加更多代码。
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Data.Common;
using System.Data.SqlClient;
using System.Linq;
using System.Reflection;
using System.Threading.Tasks;
namespace ConsoleApplication1
{
internal class Program
{
private static void Main(string[] args)
{
// Call ConvertTable here
}
private static IEnumerable<T> Map<T>(SqlDataReader dr) where T : new()
{
var enumerableDataReader = dr.Cast<DbDataRecord>().AsEnumerable();
var tObj = new T();
PropertyInfo[] propertyInfo = tObj.GetType().GetProperties();
var batches = enumerableDataReader.Batch(10000);
var resultCollection = new ConcurrentBag<List<T>>();
Parallel.ForEach(batches, batch => resultCollection.Add(MapThis<T>(propertyInfo, batch)));
return resultCollection.SelectMany(m => m.Select(x => x));
}
private static List<T> MapThis<T>(PropertyInfo[] propertyInfo, IEnumerable<DbDataRecord> batch) where T : new()
{
var list = new List<T>();
batch.AsParallel().ForAll(record =>
{
T obj = new T();
foreach (PropertyInfo prop in propertyInfo)
{
var dbVal = record[prop.Name];
if (!Equals(dbVal, DBNull.Value))
{
prop.SetValue(obj, dbVal, null);
}
}
list.Add(obj);
});
return list;
}
}
public static class Extensions
{
/// <summary>
/// Take a collection and split it into smaller collections
/// </summary>
/// <typeparam name="T">The Type</typeparam>
/// <param name="collection">The collection to split</param>
/// <param name="batchSize">The size of each batch</param>
/// <returns></returns>
public static IEnumerable<IEnumerable<T>> Batch<T>(this IEnumerable<T> collection, int batchSize)
{
var nextbatch = new List<T>(batchSize);
if (collection == null)
{
yield break;
}
foreach (T item in collection)
{
nextbatch.Add(item);
if (nextbatch.Count != batchSize)
{
continue;
}
yield return nextbatch;
nextbatch = new List<T>(batchSize);
}
if (nextbatch.Count > 0)
{
yield return nextbatch;
}
}
}
}
答案 1 :(得分:0)
你是否意识到每次打电话给String.ToUpper()
时你都会创建一个新的字符串而只是为了扔掉?并为每个记录?
我认为你正在使用HashTable
,你可能会更好:
_ht = new Dictionary<string, PropertyInfo>(StringComparer.OrdinalIgnoreCase);
然后你可以像这样使用它:
PropertyInfo info = _ht[_Rdr.GetName(i)];
如果要并行化,可能需要查看Parallel.For
或Parallel.ForEach
。
但所有这些都无法避免大量使用反射。
但我真的认为你应该做的是构建一个mapper(并且可能会缓存它)。
如果您不想使用发出IL的路径,可能需要使用表达式树: