我知道有很多关于它的问题和答案,但我的情况非常特别,因为我没有大型数组相交。我正在实现一个接收搜索对象的搜索方法,并使用linq搜索到数据库(我首先使用的是带有代码的EF 6)。
因此,我在DB中有以下实体,包含以下列:
- 用户:userId(int),name(string)。
- UserFeatures :userId(int),featuresId(short)。
- 功能:featuresId(短),说明(字符串)。
我的搜索对象是这样的:
{ “名字”:“弗里德里希威廉”, “特征”:[1,2,3,4] }
我的linq查询是这样的(想法是让所有用户都与登录用户正在寻找的功能相匹配)。
context.User.Where(u => userToSearch.features.Except(u.UserFeatures.Select(uf => uf.featuresId)))任何()== FALSE).toListAsync();
这很好用,但需要30秒的aprox。我在数据库中有30k用户,但每个用户各有4个和5个功能。
我已经尝试使用hashset,但由于功能集合不大,没有任何区别。问题可能是我有30k用户,每个用户有5个功能。这意味着大约150k次迭代。
你知道我怎么能改善这个吗?在查询中我还有另一个条件。如果我删除具有这些功能的查询,则查询速度会快得多(运行大约4秒。我还实现了Paginations -Take,skip等)。
答案 0 :(得分:2)
我认为使用join
和group by
这样会更快:
int numberOfFeatures = userToSearch.features.Count();
var userIds = from u in context.Users
from uf in u.UserFeatures
where userToSearch.features.Contains(uf.featuresId)
group u by u.userId into g
where g.Count() == numberOfFeatures
select u.Key;
或者,如果您想要用户对象
var users = ...
select u.FirstOrDefault();
答案 1 :(得分:0)
该查询和类似的查询对我来说很好。至于查询表单,除非你的模型中有链接实体,否则下面的两个是唯一想到的。无论如何看看这个repro,看看它对你的行为是否有所不同。
using System;
using System.Collections.Generic;
using System.Data.Entity;
using System.Data.SqlClient;
using System.Linq;
namespace ConsoleApp8
{
public class User
{
public int UserId { get; set; }
public string Name { get; set; }
public virtual ICollection<Feature> Feaures { get; } = new HashSet<Feature>();
}
public class Feature
{
public int FeatureId { get; set; }
public string Description { get; set; }
public virtual ICollection<User> Users { get; } = new HashSet<User>();
}
class Db : DbContext
{
public DbSet<User> Users { get; set; }
public DbSet<Feature> Features { get; set; }
protected override void OnModelCreating(DbModelBuilder modelBuilder)
{
}
}
class Program
{
static void Main(string[] args)
{
if (true)
{
Database.SetInitializer(new DropCreateDatabaseAlways<Db>());
using (var db = new Db())
{
var features = Enumerable.Range(1, 20).Select(i => new Feature() { Description = $"Feature{i}" }).ToList();
var users = Enumerable.Range(1, 30000).Select(i => new User() { Name = $"User{i}" }).ToList();
var rand = new Random();
foreach (var u in users)
{
var featureCount = rand.Next(4, 5);
for (int i = 0; i < featureCount; i++)
{
u.Feaures.Add(features[rand.Next(0, features.Count - 1)]);
}
}
db.Users.AddRange(users);
db.Features.AddRange(features);
db.SaveChanges();
}
}
List<int> requestedFeatureIds;
using (var db = new Db())
{
db.Database.Log = m => Console.WriteLine(m);
var user = db.Users.Where(u => u.Feaures.Count() == 4).AsEnumerable().Last() ;
requestedFeatureIds = user.Feaures.Select(f => f.FeatureId).ToList();
}
using (var db = new Db())
{
db.Database.Log = m => Console.WriteLine(m);
//context.User.Where(u => userToSearch.features.Except(u.UserFeatures.Select(uf => uf.featuresId))).Any()==false).toListAsync();
var q = db.Users.Where(u => requestedFeatureIds.Except(u.Feaures.Select(uf => uf.FeatureId)).Any() == false);
var results = q.ToList();
var q2 = from u in db.Users
where requestedFeatureIds.Intersect(u.Feaures.Select(f => f.FeatureId)).Count() == requestedFeatureIds.Count
select u;
var results2 = q2.ToList();
}
Console.WriteLine("Hit any key to exit");
Console.ReadKey();
}
}
}
输出(相关部分)
Opened connection at 9/7/2017 11:32:12 AM -05:00
SELECT
[Extent1].[UserId] AS [UserId],
[Extent1].[Name] AS [Name]
FROM [dbo].[Users] AS [Extent1]
WHERE 0 = (CASE WHEN ( EXISTS (SELECT
1 AS [C1]
FROM (SELECT
[UnionAll3].[C1] AS [C1]
FROM (SELECT
3 AS [C1]
FROM ( SELECT 1 AS X ) AS [SingleRowTable1]
UNION ALL
SELECT
7 AS [C1]
FROM ( SELECT 1 AS X ) AS [SingleRowTable2]
UNION ALL
SELECT
9 AS [C1]
FROM ( SELECT 1 AS X ) AS [SingleRowTable3]
UNION ALL
SELECT
17 AS [C1]
FROM ( SELECT 1 AS X ) AS [SingleRowTable4]) AS [UnionAll3]
EXCEPT
SELECT
[Extent2].[Feature_FeatureId] AS [Feature_FeatureId]
FROM [dbo].[UserFeatures] AS [Extent2]
WHERE [Extent1].[UserId] = [Extent2].[User_UserId]) AS [Except1]
)) THEN cast(1 as bit) ELSE cast(0 as bit) END)
-- Executing at 9/7/2017 11:32:12 AM -05:00
-- Completed in 94 ms with result: SqlDataReader
Closed connection at 9/7/2017 11:32:12 AM -05:00
Opened connection at 9/7/2017 11:32:12 AM -05:00
SELECT
[Project7].[UserId] AS [UserId],
[Project7].[Name] AS [Name]
FROM ( SELECT
[Extent1].[UserId] AS [UserId],
[Extent1].[Name] AS [Name],
(SELECT
COUNT(1) AS [A1]
FROM (SELECT
[UnionAll3].[C1] AS [C1]
FROM (SELECT
3 AS [C1]
FROM ( SELECT 1 AS X ) AS [SingleRowTable1]
UNION ALL
SELECT
7 AS [C1]
FROM ( SELECT 1 AS X ) AS [SingleRowTable2]
UNION ALL
SELECT
9 AS [C1]
FROM ( SELECT 1 AS X ) AS [SingleRowTable3]
UNION ALL
SELECT
17 AS [C1]
FROM ( SELECT 1 AS X ) AS [SingleRowTable4]) AS [UnionAll3]
INTERSECT
SELECT
[Extent2].[Feature_FeatureId] AS [Feature_FeatureId]
FROM [dbo].[UserFeatures] AS [Extent2]
WHERE [Extent1].[UserId] = [Extent2].[User_UserId]) AS [Intersect1]) AS [C1]
FROM [dbo].[Users] AS [Extent1]
) AS [Project7]
WHERE [Project7].[C1] = @p__linq__0
-- p__linq__0: '4' (Type = Int32, IsNullable = false)
-- Executing at 9/7/2017 11:32:12 AM -05:00
-- Completed in 206 ms with result: SqlDataReader
Closed connection at 9/7/2017 11:32:13 AM -05:00
Hit any key to exit