我的应用程序中有一个包含GUID和一个100,000+记录表的字符串列表,其中包含一个Entity Framework模型。
查找GUID列表不存在的特定数据集中的所有记录的最有效方法是什么?
以下表现非常缓慢:
var list= new List<string> { "1", "2", "3" };
return (from t1 in db.Items
where (!list.Contains(t1.GUID))
答案 0 :(得分:1)
当我有大量参数(几百或更多)到查询时,我使用批量插入到临时表,然后从主表连接它。
我的代码看起来像这样:
private static DataTable FillDataTable(IEnumerable<int> keys) {
var dataTable = new DataTable("Stage");
dataTable.Locale = CultureInfo.CurrentCulture;
dataTable.Columns.Add("Key", typeof(int));
foreach (var key in keys) {
var row = dataTable.NewRow();
row[0] = key;
dataTable.Rows.Add(row);
}
return dataTable;
}
private static void CreateStageTable(SqlConnection connection, string tableName, DataTable dataTable) {
var sql = new StringBuilder();
sql.AppendLine("CREATE TABLE {StageTableName} ( ");
sql.AppendLine(" Key INT NOT NULL ");
sql.AppendLine(") ");
sql.Replace("{StageTableName}", SqlUtilities.QuoteName(tableName));
using (var command = connection.CreateCommand()) {
command.CommandText = sql.ToString();
command.CommandType = CommandType.Text;
command.ExecuteNonQuery();
}
using (var bulkcopy = new SqlBulkCopy(connection)) {
bulkcopy.DestinationTableName = tableName;
bulkcopy.WriteToServer(dataTable);
}
}
public void DoQuery(IEnumerable<int> keys) {
var dataTable = FillDataTable(keys);
using (var connection = new SqlConnection(_connectionString)) {
connection.Open();
CreateStageTable(connection, "#Stage", dataTable);
string sql = "SELECT x " +
"FROM tbl " +
" LEFT JOIN {StageTableName} AS Stage " +
" ON x.Key = Stage.Key "
"WHERE Stage.Key IS NULL";
...
}
}
答案 1 :(得分:1)
不要使用List
,使用HashSet<string>
,这会为您提供 O(1) 查找而不是 O(n)的 强>