我正在尝试根据用户汇总大约200万行。 一个用户有多个事务,每个事务都有一个Platform和一个TransactionType。我将Platform和TransactionType列汇总为json并保存为一行。
但是我的代码很慢。 如何提高性能?
public static void AggregateTransactions()
{
using (var db = new ApplicationDbContext())
{
db.ChangeTracker.AutoDetectChangesEnabled = false;
//Get a list of users who have transactions
var users = db.Transactions
.Select(x => x.User)
.Distinct();
foreach (var user in users.ToList())
{
//Get all transactions for a particular user
var _transactions = db.Transactions
.Include(x => x.Platform)
.Include(x => x.TransactionType)
.Where(x => x.User == user)
.ToList();
//Aggregate Platforms from all transactions for user
Dictionary<string, int> platforms = new Dictionary<string, int>();
foreach (var item in _transactions.Select(x => x.Platform).GroupBy(x => x.Name).ToList())
{
platforms.Add(item.Key, item.Count());
};
//Aggregate TransactionTypes from all transactions for user
Dictionary<string, int> transactionTypes = new Dictionary<string, int>();
foreach (var item in _transactions.Select(x => x.TransactionType).GroupBy(x => x.Name).ToList())
{
transactionTypes.Add(item.Key, item.Count());
};
db.Add<TransactionByDay>(new TransactionByDay
{
User = user,
Platforms = platforms, //The dictionary list is represented as json in table
TransactionTypes = transactionTypes //The dictionary list is represented as json in table
});
db.SaveChanges();
}
}
}
更新
因此,数据的基本视图如下所示:
交易数据:
Id:b11c6b67-6c74-4bbe-f712-08d609af20cf, 用户名:1, PlatformId:3, TransactionypeId:1
Id:4782803f-2f6b-4d99-f717-08d609af20cf, 用户名:1, PlatformId:3, TransactionypeId:4
将数据汇总为TransactionPerDay:
Id:9df41ef2-2fc8-441b-4a2f-08d609e21559, 用户名:1, 平台:{“ p3”:2}, TransactionsTypes:{“ t1”:1,“ t4”:1}
因此,在这种情况下,两个事务合计为一个。您会看到平台和事务类型将汇总为json。
答案 0 :(得分:3)
您可能不应该在循环内调用db.saveChanges()。将其放在循环之外以保留更改一次可能会有所帮助。
但是,话虽如此,当处理大量数据和性能是关键时,我发现ADO.NET可能是更好的选择。这并不意味着您必须停止使用实体框架,但是对于这种方法,您可以使用ADO.NET。如果您走这条路,则可以:
创建一个存储过程以返回需要处理的数据,填充数据表,操作数据并使用sqlBulkCopy批量持久存储所有内容。
使用存储过程来完全执行此操作。这样避免了将数据传送到应用程序的麻烦,并且整个处理过程都可以在数据库内部进行。
答案 1 :(得分:2)
Linq To EF并不是为了提高速度而构建的(LinqToSQL更加简便快捷,恕我直言,或者您可以使用Linq EF \ SQL运行直接SQL命令)。无论如何,我不知道这样做的速度如何:
using (var db = new MyContext(connectionstring))
{
var tbd = (from t in db.Transactions
group t by t.User
into g
let platforms = g.GroupBy(tt => tt.Platform.Name)
let trantypes = g.GroupBy(tt => tt.TransactionType.Name)
select new {
User = g.Key,
Platforms = platforms,
TransactionTypes = trantypes
}).ToList()
.Select(u => new TransactionByDay {
User=u.User,
Platforms=u.Platforms.ToDictionary(tt => tt.Key, tt => tt.Count()),
TransactionTypes = u.TransactionTypes.ToDictionary(tt => tt.Key, tt => tt.Count())
});
//...
}
答案 2 :(得分:1)
这个想法是尝试减少查询,并包括首先获取所需的尽可能多的数据。因此,无需在每个事务中都包含Platform
和TransactionType
,您只需在Dictionary
中查询一次即可查找数据。此外,我们可以并行处理,然后一次保存所有数据。
public static void AggregateTransactions()
{
using (var db = new ApplicationDbContext())
{
db.ChangeTracker.AutoDetectChangesEnabled = false;
//Get a list of users who have transactions
var transactionsByUser = db.Transactions
.GroupBy(x => x.User) //Not sure if EF Core supports this kind of grouping
.ToList();
var platforms = db.Platforms.ToDictionary(ks => ks.PlatformId);
var Transactiontypes = db.TransactionTypes.ToDictionary(ks => ks.TransactionTypeId);
var bag = new ConccurentBag<TransactionByDay>();
Parallel.ForEach(transactionsByUser, transaction =>
{
//Aggregate Platforms from all transactions for user
Dictionary<string, int> platforms = new Dictionary<string, int>(); //This can be converted to a ConccurentDictionary
//This can be converted to Parallel.ForEach
foreach (var item in _transactions.Select(x => platforms[x.PlatformId]).GroupBy(x => x.Name).ToList())
{
platforms.Add(item.Key, item.Count());
};
//Aggregate TransactionTypes from all transactions for user
Dictionary<string, int> transactionTypes = new Dictionary<string, int>(); //This can be converted to a ConccurentDictionary
//This can be converted to Parallel.ForEach
foreach (var item in _transactions.Select(x => Transactiontypes[c.TransactionTypeId]).GroupBy(x => x.Name).ToList())
{
transactionTypes.Add(item.Key, item.Count());
};
bag.Add(new TransactionByDay
{
User = transaction.Key,
Platforms = platforms, //The dictionary list is represented as json in table
TransactionTypes = transactionTypes //The dictionary list is represented as json in table
});
});
//Before calling this we may need to check the status of the Parallel ForEach, or just convert it back to regular foreach loop if you see no benefit.
db.AddRange(bag);
db.SaveChanges();
}
}
变化#2
public static void AggregateTransactions()
{
using (var db = new ApplicationDbContext())
{
db.ChangeTracker.AutoDetectChangesEnabled = false;
//Get a list of users who have transactions
var users = db.Transactions
.Select(x => x.User)
.Distinct().ToList();
var platforms = db.Platforms.ToDictionary(ks => ks.PlatformId);
var Transactiontypes = db.TransactionTypes.ToDictionary(ks => ks.TransactionTypeId);
var bag = new ConccurentBag<TransactionByDay>();
Parallel.ForEach(users, user =>
{
var _transactions = db.Transactions
.Where(x => x.User == user)
.ToList();
//Aggregate Platforms from all transactions for user
Dictionary<string, int> userPlatforms = new Dictionary<string, int>();
Dictionary<string, int> userTransactions = new Dictionary<string, int>();
foreach(var transaction in _transactions)
{
if(platforms.TryGetValue(transaction.PlatformId, out var platform))
{
if(userPlatforms.TryGetValue(platform.Name, out var tmp))
{
userPlatforms[platform.Name] = tmp + 1;
}
else
{
userPlatforms.Add(platform.Name, 1);
}
}
if(Transactiontypes.TryGetValue(transaction.TransactionTypeId, out var type))
{
if(userTransactions.TryGetValue(type.Name, out var tmp))
{
userTransactions[type.Name] = tmp + 1;
}
else
{
userTransactions.Add(type.Name, 1);
}
}
}
bag.Add(new TransactionByDay
{
User = user,
Platforms = userPlatforms, //The dictionary list is represented as json in table
TransactionTypes = userTransactions //The dictionary list is represented as json in table
});
});
db.AddRange(bag);
db.SaveChanges();
}
}