LEFT JOIN导致重复结果

时间:2017-04-14 02:23:57

标签: sql sql-server tsql

我在这里整理了一个问题的简化版本。

方案

  • 我的应用程序包含用户,文件和文件夹。
  • 用户可以创建只有他可以查看的私人文件和所有用户都可以看到的共享文件。
  • 用户可以创建可以将其私人和共享文件组织到的私人文件夹。但是文件夹分配是可选的。如果用户没有分配文件夹,则文件显示在"未分类的"箱中。

模式

-- -------------------------------------
-- User
-- -------------------------------------

CREATE TABLE [User] (
  [Id] VARCHAR(50) NOT NULL
);

INSERT INTO [User]
  VALUES ('user_1');
INSERT INTO [User]
  VALUES ('user_2');

-- -------------------------------------
-- Folder
-- -------------------------------------

CREATE TABLE [Folder] (
    [Id] VARCHAR(50) NOT NULL,
    [UserId] VARCHAR(50) NOT NULL
);

-- Each user has a private folder
INSERT INTO [Folder]
  VALUES ('user1_folder', 'user_1');
INSERT INTO [Folder]
  VALUES ('user2_folder', 'user_2');

-- -------------------------------------
-- File
-- -------------------------------------

CREATE TABLE [File] (
  [Id] VARCHAR(50) NOT NULL,
  [UserId] VARCHAR(50) NULL
);

-- Private files
INSERT INTO [File]
  VALUES ('user1_file1', 'user_1');
INSERT INTO [File]
  VALUES ('user1_file2', 'user_1');

INSERT INTO [File]
  VALUES ('user2_file1', 'user_2');
INSERT INTO [File]
  VALUES ('user2_file2', 'user_2');

-- Shared files
INSERT INTO [File]
  VALUES ('shared_file1', NULL);
INSERT INTO [File]
  VALUES ('shared_file2', NULL);
INSERT INTO [File]
  VALUES ('shared_file3', NULL);
-- UPDATE: new case
INSERT INTO [File]
  VALUES ('shared_file4', NULL);

-- -------------------------------------
-- FolderFile Association
-- -------------------------------------

CREATE TABLE [FolderFile] (
    [FolderId] VARCHAR(50) NOT NULL,
    [FileId] VARCHAR(50) NOT NULL
);

-- User 1 puts some files in his private folders
INSERT INTO [FolderFile]
  VALUES ('user1_folder', 'user1_file');
INSERT INTO [FolderFile]
  VALUES ('user1_folder', 'shared_file1');
INSERT INTO [FolderFile]
  VALUES ('user1_folder', 'shared_file2');

-- User 2 puts some files in his private folders
INSERT INTO [FolderFile]
  VALUES ('user2_folder', 'user2_file');
INSERT INTO [FolderFile]
  VALUES ('user2_folder', 'shared_file1');
-- UPDATE: new case
INSERT INTO [FolderFile]
  VALUES ('user2_folder', 'shared_file4');

期望结果

我希望查看给定@UserId(本例中为user_1)的所有私有和共享文件,以及该用户的相关私人文件夹(如果有的话)。 请注意,文件夹对于用户的文件是可选的

尝试查询#1

DECLARE @UserId VARCHAR(50) = 'user_1'

SELECT
  F.[Id] AS [FileId],
  F.[UserId] AS [FileUserId],
  FO.[Id] AS [FolderId]
FROM
  [File] AS F
LEFT JOIN
  [FolderFile] FOF ON FOF.[FileId] = F.[Id]
LEFT JOIN
  [Folder] FO ON FO.[Id] = FOF.[FolderId]
WHERE
  F.[UserId] IS NULL
  OR F.[UserId] = @UserId

结果#1

FileId          FileUserId    FolderId
=========================================
user1_file1     user_1        NULL
user1_file2     user_1        NULL
shared_file1    NULL          user1_folder
shared_file1    NULL          user2_folder   <== bad result
shared_file2    NULL          user1_folder
shared_file3    NULL          NULL
shared_file4    NULL          user2_folder   <== bad result

尝试查询#2

Folder JOIN ON添加其他条件。

DECLARE @UserId VARCHAR(50) = 'user_1'

SELECT
  F.[Id] AS [FileId],
  F.[UserId] AS [FileUserId],
  FO.[Id] AS [FolderId]
FROM
  [File] AS F
LEFT JOIN
  [FolderFile] FOF ON FOF.[FileId] = F.[Id]
LEFT JOIN
  [Folder] FO ON FO.[Id] = FOF.[FolderId] AND FO.[UserId] = @UserId -- Add another condition here on UserId
WHERE
  F.[UserId] IS NULL
  OR F.[UserId] = @UserId

结果#2

FileId          FileUserId    FolderId
=========================================
user1_file1     user_1        NULL
user1_file2     user_1        NULL
shared_file1    NULL          user1_folder
shared_file1    NULL          NULL           <== bad result
shared_file2    NULL          user1_folder
shared_file3    NULL          NULL
shared_file4    NULL          NULL

分析

如上所示,user_2文件夹的关联会导致user_1返回额外的行。我不想要包含这一行。

如果FolderFile表上有UserId,我认为我可以用条件限制它,但它不会。通过关联的UserId隐含Folder。关联上的LEFT JOIN导致它传播null并传递它下面的条件。

我的想法已经不多了,虽然它可能很明显:)

更新#1

我添加了一个shared_file4的新案例,该案例位于user_2的文件夹中,但不是user_1。它应该包含在两个用户的结果中。

INSERT INTO [File]
  VALUES ('shared_file4', NULL);

INSERT INTO [FolderFile]
  VALUES ('user2_folder', 'shared_file4');

9 个答案:

答案 0 :(得分:1)

关于LEFT JOIN已经有了一些很好的答案。我决定和CTE一起玩,看看我是否可以做出非常有表现力的答案。享受:

DECLARE @UserId VARCHAR(50) = 'user_1'

;WITH
PrivateFile (FileId) AS
(
  SELECT Id FROM [File]
  WHERE UserId = @UserId
),
SharedFile (FileId) AS
(
  SELECT Id FROM [File]
  WHERE UserId is null
),
AnyFile ([FileId]) AS
(
  SELECT FileId FROM PrivateFile
  UNION
  SELECT FileId FROM SharedFile
),
PrivateFolder (FolderId) AS
(
  SELECT Id FROM [Folder]
  WHERE UserId = @UserId
),
AssociatedFolder ([FileId], [FolderId]) AS
(
  SELECT ff.FileId, ff.FolderId
  FROM [FolderFile] ff
    JOIN PrivateFolder pf ON ff.FolderId = pf.FolderId
)
SELECT f.[FileId], @UserId as UserId, fo.[FolderId]
FROM AnyFile as f
  LEFT JOIN AssociatedFolder as fo ON f.[FileId] = fo.[FileId]

答案 1 :(得分:1)

这为给定数据提供了所需的答案。

-- Sample data.
declare @Users as Table ( UserId VarChar(50) not NULL );
insert into @Users ( UserId ) values
  ( 'user_1' ), ( 'user_2' );

declare @Folders as Table ( FolderId VarChar(50) not NULL, UserId VarChar(50) not NULL );
insert into @Folders ( FolderId, UserId ) values
  ( 'user1_folder', 'user_1' ), ( 'user2_folder', 'user_2' );

declare @Files as Table ( FileId VarChar(50) not NULL, UserId VarChar(50) NULL );
insert into @Files ( FileId, UserId ) values
  -- Private files.
  ( 'user1_file1', 'user_1' ), ( 'user1_file2', 'user_1' ),
  ( 'user2_file1', 'user_2' ), ( 'user2_file2', 'user_2' ),
  -- Shared files.
  ( 'shared_file1', NULL ), ( 'shared_file2', NULL ), ( 'shared_file3', NULL ), ( 'shared_file4', NULL );

declare @FileFolders as Table ( FolderId VarChar(50) not NULL, FileId VarChar(50) not NULL );
insert into @FileFolders ( FolderId, FileId ) values
  -- User 1 puts some files in his private folders.
  ( 'user1_folder', 'user1_file' ), ( 'user1_folder', 'shared_file1' ), ( 'user1_folder', 'shared_file2' ),
  -- User 2 puts some files in his private folders.
  ( 'user2_folder', 'user2_file' ), ( 'user2_folder', 'shared_file1' ), ( 'user2_folder', 'shared_file4' );

select * from @Users;
select * from @Files;
select * from @Folders;
select * from @FileFolders;

-- Query the data.
declare @UserId as VarChar(50) = 'user_1';

with
  -- Any file with a   UserId   of   NULL is shared.
  --   If it is in the given user's folders then pick up the folder.
  SharedFiles as (
    select Fi.FileId, Max( Fi.UserId ) as UserId, Max( Fo.FolderId ) as FolderId
      from @Files as Fi left outer join
        @FileFolders as FF on FF.FileId = Fi.FileId left outer join
        @Folders as Fo on Fo.FolderId = FF.FolderId and ( Fo.UserId = @UserId or FF.FileId is NULL )
      where Fi.UserId is NULL
      group by Fi.FileId ),
  -- Any file with a non-NULL   UserId   is private.
  --   Find all of the given user's files.
  PrivateFiles as (
    select Fi.FileId, Fi.UserId, Fo.FolderId
      from @Files as Fi left outer join
        @FileFolders as FF on FF.FileId = Fi.FileId left outer join
        @Folders as Fo on Fo.FolderId = FF.FolderId and Fo.UserId = @UserId
      where Fi.UserId = @UserId )
   select FileId, UserId, FolderId
     from PrivateFiles
   union all
   select FileId, UserId, FolderId
     from SharedFiles;

答案 2 :(得分:1)

我使用Row_number

更改了#query2
;WITH temp AS 
 (
    SELECT
    F.[Id] AS [FileId],
    F.[UserId] AS [FileUserId],
    FO.[Id] AS [FolderId],
    row_number() OVER(PARTITION BY F.Id ORDER BY FO.Id DESC) AS Rn 
   -- if folder id not null (it means that folder belongs to @UserId) 
   --> it will be the first priority -- Rownumber = 1
   FROM
    [File] AS F
   LEFT JOIN
    [FolderFile] FOF ON FOF.[FileId] = F.[Id]
   LEFT JOIN
    [Folder] FO ON FO.[Id] = FOF.[FolderId] AND FO.[UserId] = @UserId 
   WHERE
    F.[UserId] IS NULL
    OR F.[UserId] = @UserId
 )
 SELECT t.FileId, t.FileUserId, t.FolderId FROM temp t
 WHERE rn = 1

答案 3 :(得分:1)

使用outer apply()

declare @UserId varchar(50) = 'user_1';
select 
    FileId = F.Id
  , FileUserId = F.UserId
  , FolderId = x.Id
from [File] as F 
  outer apply (
    select top 1 
      Id = case when fo.UserId = @UserId then fo.Id else null end
    from [FolderFile] fof 
      left join [Folder] fo
        on fo.Id = fof.FolderId
    where fof.FileId = f.id
    order by case when fo.UserId = @UserId then 0 else 1 end
    ) as x
where (f.UserId = @UserId or f.UserId is null);

rextester演示:http://rextester.com/YEAMZ12650

返回:

+--------------+------------+--------------+
|    FileId    | FileUserId |   FolderId   |
+--------------+------------+--------------+
| user1_file1  | user_1     | NULL         |
| user1_file2  | user_1     | NULL         |
| shared_file1 | NULL       | user1_folder |
| shared_file2 | NULL       | user1_folder |
| shared_file3 | NULL       | NULL         |
| shared_file4 | NULL       | NULL         |
+--------------+------------+--------------+

答案 4 :(得分:0)

[FolderFile]表显示&#34; shared_file1&#34;存在于user1_folder和user2_folder中。这是对的吗?

(抱歉,我没有足够的意义来添加评论。)

答案 5 :(得分:0)

请尝试以下方法......

DECLARE @UserId VARCHAR( 50 ) = 'user_1'

SELECT File.Id AS FileId,
       File.UserId AS FileUserId,
       Folder.Id AS FolderId
FROM File
LEFT JOIN FolderFile ON FolderFile.FileId = File.Id
LEFT JOIN Folder ON Folder.Id = FolderFile.FolderId
                AND Folder.UserId = @UserId
WHERE ( File.UserId IS NULL OR
        File.UserId = @UserId )
  AND ( FileUserId IS NOT NULL OR
        FolderId IS NOT NULL )

您的第二次尝试查询已关闭,您只需添加一个排除子句,其中两个字段均为NULL /包含子句,其中至少有一个字段为NULL

如果您有任何问题或意见,请随时发表评论。

进一步阅读

https://www.w3schools.com/sql/sql_null_values.asp

答案 6 :(得分:0)

额外的行由第一个LEFT JOIN引入FolderFile,而不是LEFT JOIN引入Folder,因此在{{1}上添加了额外的连接条件表格不会消除该行。

但是,您可以在Folder子句中过滤掉行。由于您需要没有文件夹的共享文件的行,例如WHERE或链接到属于shared_file3的文件夹的共享文件的行,只需将以下过滤器添加到查询1中。

@UserId

<强>更新

如果您只想包含属于该用户但仍包含所有共享文件的私人文件夹,则以下操作应该可以解决问题。

DECLARE @UserId VARCHAR(50) = 'user_1'

SELECT
  F.[Id] AS [FileId],
  F.[UserId] AS [FileUserId],
  FO.[Id] AS [FolderId]
FROM
  [File] AS F
LEFT JOIN
  [FolderFile] FOF ON FOF.[FileId] = F.[Id]
LEFT JOIN
  [Folder] FO ON FO.[Id] = FOF.[FolderId]
WHERE (F.[UserId] IS NULL OR F.[UserId] = @UserId)
  AND (FO.UserId IS NULL OR FO.UserId = @UserId)

答案 7 :(得分:0)

你可以试试这个:

DECLARE @UserId VARCHAR(50) = 'user_1'

SELECT
    F.[Id] AS [FileId],
    F.[UserId] AS [FileUserId],
    FO.[Id] AS [FolderId]
FROM
    [File] AS F
LEFT JOIN
    [FolderFile] FOF ON FOF.[FileId] = F.[Id]
LEFT JOIN
    [Folder] FO ON FO.[Id] = FOF.[FolderId]
WHERE
    FO.[UserId] = @UserId
    OR F.[UserId] = @UserId;

已编辑:对于shared_file_3,其中包含NULL user_id,而不在任何文件夹中,如果您的设计出现在所有用户shared files中,那么您应该使用:

DECLARE @UserId VARCHAR(50) = 'user_1'

SELECT
    F.[Id] AS [FileId],
    F.[UserId] AS [FileUserId],
    FO.[Id] AS [FolderId]
FROM
    [File] AS F
LEFT JOIN
    [FolderFile] FOF ON FOF.[FileId] = F.[Id]
LEFT JOIN
    [Folder] FO ON FO.[Id] = FOF.[FolderId]
WHERE
    FO.[UserId] = @UserId
    OR F.[UserId] = @UserId
    OR (FO.[UserId] IS NULL AND F.[UserId] IS NULL);

答案 8 :(得分:0)

尽管给出的许多答案都能够返回我需要的结果集,但它们并没有提供非常好的查询计划。我最终决定以最佳性能实现我想要的结果集的最佳方法是对FolderFile表进行非规范化并添加UserId列。现在可以使用此列,我可以使用类似于原始查询尝试的标准联接,在FolderFile LEFT JOIN过滤用户。

CREATE TABLE [FolderFile] (
    [FolderId] VARCHAR(50) NOT NULL,
    [FileId] VARCHAR(50) NOT NULL,
    [UserId] VARCHAR(50) NOT NULL
);
SELECT
  F.[Id] AS [FileId],
  F.[UserId] AS [FileUserId],
  FO.[Id] AS [FolderId]
FROM
  [File] AS F
LEFT JOIN
  [FolderFile] FOF ON FOF.[FileId] = F.[Id] AND FOF.[UserId] = @UserId
LEFT JOIN
  [Folder] FO ON FO.[Id] = FOF.[FolderId]
WHERE
  F.[UserId] IS NULL
  OR F.[UserId] = @UserId