如何使用EF Core汇总数百万行

时间:2018-08-24 17:11:49

标签: c# sql-server entity-framework .net-core

我正在尝试根据用户汇总大约200万行。 一个用户有多个事务,每个事务都有一个Platform和一个TransactionType。我将Platform和TransactionType列汇总为json并保存为一行。

但是我的代码很慢。 如何提高性能?

  public static void AggregateTransactions()
        {
            using (var db = new ApplicationDbContext())
            {
                db.ChangeTracker.AutoDetectChangesEnabled = false;

                //Get a list of users who have transactions  
                var users = db.Transactions
                   .Select(x => x.User)
                   .Distinct();

                foreach (var user in users.ToList())
                {
                    //Get all transactions for a particular user
                    var _transactions = db.Transactions
                        .Include(x => x.Platform)
                        .Include(x => x.TransactionType)
                        .Where(x => x.User == user)
                        .ToList();

//Aggregate Platforms from all transactions for user
                    Dictionary<string, int> platforms = new Dictionary<string, int>();

                    foreach (var item in _transactions.Select(x => x.Platform).GroupBy(x => x.Name).ToList())
                    {
                        platforms.Add(item.Key, item.Count());
                    };

//Aggregate TransactionTypes from all transactions for user
                   Dictionary<string, int> transactionTypes = new Dictionary<string, int>();

                    foreach (var item in _transactions.Select(x => x.TransactionType).GroupBy(x => x.Name).ToList())
                    {
                        transactionTypes.Add(item.Key, item.Count());
                    };


                    db.Add<TransactionByDay>(new TransactionByDay
                    {
                        User = user,
                        Platforms = platforms,     //The dictionary list is represented as json in table
                        TransactionTypes = transactionTypes     //The dictionary list is represented as json in table
                    });

                    db.SaveChanges();

                }

            }

        }

更新

因此,数据的基本视图如下所示:

交易数据:

Id:b11c6b67-6c74-4bbe-f712-08d609af20cf, 用户名:1​​, PlatformId:3, TransactionypeId:1

Id:4782803f-2f6b-4d99-f717-08d609af20cf, 用户名:1​​, PlatformId:3, TransactionypeId:4

将数据汇总为TransactionPerDay:

Id:9df41ef2-2fc8-441b-4a2f-08d609e21559, 用户名:1​​, 平台:{“ p3”:2}, TransactionsTypes:{“ t1”:1,“ t4”:1}

因此,在这种情况下,两个事务合计为一个。您会看到平台和事务类型将汇总为json。

3 个答案:

答案 0 :(得分:3)

您可能不应该在循环内调用db.saveChanges()。将其放在循环之外以保留更改一次可能会有所帮助。

但是,话虽如此,当处理大量数据和性能是关键时,我发现ADO.NET可能是更好的选择。这并不意味着您必须停止使用实体框架,但是对于这种方法,您可以使用ADO.NET。如果您走这条路,则可以:

  1. 创建一个存储过程以返回需要处理的数据,填充数据表,操作数据并使用sqlBulkCopy批量持久存储所有内容。

  2. 使用存储过程来完全执行此操作。这样避免了将数据传送到应用程序的麻烦,并且整个处理过程都可以在数据库内部进行。

答案 1 :(得分:2)

Linq To EF并不是为了提高速度而构建的(LinqToSQL更加简便快捷,恕我直言,或者您可以使用Linq EF \ SQL运行直接SQL命令)。无论如何,我不知道这样做的速度如何:

    using (var db = new MyContext(connectionstring))
    {

        var tbd = (from t in db.Transactions
                    group t by t.User
                    into g
                    let platforms = g.GroupBy(tt => tt.Platform.Name)
                    let trantypes = g.GroupBy(tt => tt.TransactionType.Name)
                    select new {
                       User = g.Key,
                       Platforms = platforms, 
                       TransactionTypes = trantypes 
                    }).ToList()
                    .Select(u => new TransactionByDay {
                        User=u.User, 
                        Platforms=u.Platforms.ToDictionary(tt => tt.Key, tt => tt.Count()),
                        TransactionTypes = u.TransactionTypes.ToDictionary(tt => tt.Key, tt => tt.Count())
                    });
 //...
}

答案 2 :(得分:1)

这个想法是尝试减少查询,并包括首先获取所需的尽可能多的数据。因此,无需在每个事务中都包含PlatformTransactionType,您只需在Dictionary中查询一次即可查找数据。此外,我们可以并行处理,然后一次保存所有数据。

    public static void AggregateTransactions()
    {
        using (var db = new ApplicationDbContext())
        {
            db.ChangeTracker.AutoDetectChangesEnabled = false;

            //Get a list of users who have transactions  
            var transactionsByUser = db.Transactions
               .GroupBy(x => x.User) //Not sure if EF Core supports this kind of grouping
               .ToList();

            var platforms = db.Platforms.ToDictionary(ks => ks.PlatformId);
            var Transactiontypes = db.TransactionTypes.ToDictionary(ks => ks.TransactionTypeId);
            var bag = new ConccurentBag<TransactionByDay>();

            Parallel.ForEach(transactionsByUser, transaction => 
            {
                //Aggregate Platforms from all transactions for user
                Dictionary<string, int> platforms = new Dictionary<string, int>(); //This can be converted to a ConccurentDictionary

                //This can be converted to Parallel.ForEach
                foreach (var item in _transactions.Select(x => platforms[x.PlatformId]).GroupBy(x => x.Name).ToList())
                {
                    platforms.Add(item.Key, item.Count());
                };

               //Aggregate TransactionTypes from all transactions for user
               Dictionary<string, int> transactionTypes = new Dictionary<string, int>(); //This can be converted to a ConccurentDictionary

                //This can be converted to Parallel.ForEach
                foreach (var item in _transactions.Select(x => Transactiontypes[c.TransactionTypeId]).GroupBy(x => x.Name).ToList())
                {
                    transactionTypes.Add(item.Key, item.Count());
                };

                bag.Add(new TransactionByDay
                {
                    User = transaction.Key,
                    Platforms = platforms,     //The dictionary list is represented as json in table
                    TransactionTypes = transactionTypes     //The dictionary list is represented as json in table
                });
            });

            //Before calling this we may need to check the status of the Parallel ForEach, or just convert it back to regular foreach loop if you see no benefit.
            db.AddRange(bag);
            db.SaveChanges();
        }
    }

变化#2

    public static void AggregateTransactions()
    {
        using (var db = new ApplicationDbContext())
        {
            db.ChangeTracker.AutoDetectChangesEnabled = false;

            //Get a list of users who have transactions  
            var users = db.Transactions
               .Select(x => x.User)
               .Distinct().ToList();

            var platforms = db.Platforms.ToDictionary(ks => ks.PlatformId);
            var Transactiontypes = db.TransactionTypes.ToDictionary(ks => ks.TransactionTypeId);
            var bag = new ConccurentBag<TransactionByDay>();

            Parallel.ForEach(users, user => 
            {
                var _transactions = db.Transactions
                .Where(x => x.User == user)
                .ToList();

                //Aggregate Platforms from all transactions for user
                Dictionary<string, int> userPlatforms = new Dictionary<string, int>();
                Dictionary<string, int> userTransactions = new Dictionary<string, int>();

                foreach(var transaction in _transactions)
                {
                   if(platforms.TryGetValue(transaction.PlatformId, out var platform))
                   {
                       if(userPlatforms.TryGetValue(platform.Name, out var tmp))
                       {
                           userPlatforms[platform.Name] = tmp + 1;
                       }
                       else
                       {
                           userPlatforms.Add(platform.Name, 1);
                       }
                   }

                   if(Transactiontypes.TryGetValue(transaction.TransactionTypeId, out var type))
                   {
                       if(userTransactions.TryGetValue(type.Name, out var tmp))
                       {
                           userTransactions[type.Name] = tmp + 1;
                       }
                       else
                       {
                           userTransactions.Add(type.Name, 1);
                       }
                   }
                }

                bag.Add(new TransactionByDay
                {
                    User = user,
                    Platforms = userPlatforms,     //The dictionary list is represented as json in table
                    TransactionTypes = userTransactions     //The dictionary list is represented as json in table
                });

            });

            db.AddRange(bag);
            db.SaveChanges();

        }
    }