获取多列的最常见(模态)值

时间:2016-05-20 12:01:13

标签: sql-server-2008 count sql-order-by partition

对于同一个人,我正在清理那些记录不良且社会人口统计信息不一致的记录。我想为每个人采用最常见的值(模式)。

一种方法是按id进行分区,然后计算每个值出现的次数,保留每个id的最高计数:

 DROP TABLE dbo.table
 SELECT DISTINCT [id], [ethnic_group] AS [ethnic_mode], ct INTO dbo.table
 FROM (
     SELECT row_number() OVER (PARTITION BY [id] ORDER BY count([ethnic_group]) DESC) as rn, count([ethnic_group]) as ct, [ethnic_group], [id]
     FROM 
     dbo.mytable GROUP BY [id], [ethnic_group]) ranked
     where rn = 1
 ORDER BY ct DESC

但是我想为几个变量(族群,收入群体和其他几个变量)做这个。

如何在一个语句中为多个变量选择模式并插入一个表(而不是为每个变量创建单独的表)?

下表说明了我想要做的一个例子:

 DROP TABLE mytable;
 CREATE TABLE mytable(
    id     VARCHAR(2) NOT NULL PRIMARY KEY
   ,ethnic_group VARCHAR(12) NOT NULL
   ,ethnic_mode VARCHAR(11) NOT NULL
   ,income VARCHAR(6) NOT NULL
   ,income_mode VARCHAR(11) NOT NULL
 );
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('id','ethnic_group','ethnic_mode','income','income_mode');
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('1','white','white','middle','middle');
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('1','white','white','middle','middle');
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('1','mixed','white','high','middle');
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('2','asian','asian','middle','middle');
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('2','mixed','asian','middle','middle');
 INSERT INTO mytable(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('2','asian','asian','middle','middle');

1 个答案:

答案 0 :(得分:0)

我会使用子查询在1个插入语句中完成此操作。

以下是基于您插图中的表格结构的示例:

/* This is the original table and contains duplicate ID's */
DECLARE @source_table TABLE(
    id     VARCHAR(2) NOT NULL
   ,ethnic_group VARCHAR(12) NULL
   ,ethnic_mode VARCHAR(11) NULL
   ,income VARCHAR(6) NULL
   ,income_mode VARCHAR(11) NULL
 );

/* This is the destination table and will not contain duplicate ID's */
DECLARE @destination_table TABLE(
    id     VARCHAR(2) NOT NULL PRIMARY KEY
   ,ethnic_group VARCHAR(12) NULL
   ,income VARCHAR(6) NULL
 );

/* Populate the source table with data */
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('1','white','white','middle','middle');
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('1','white','white','middle','middle');
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('1','mixed','white','high','middle');
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('2','asian','asian','middle','middle');
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('2','mixed','asian','middle','middle');
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('2','asian','asian','middle','middle');
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('3','asian', NULL, NULL, NULL);
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('3',NULL, NULL,'middle', NULL);
 INSERT INTO @source_table(id,ethnic_group,ethnic_mode,income,income_mode) VALUES ('3',NULL, NULL, NULL, NULL);

/* Insert from source into destination (removing duplicates) */
INSERT INTO @destination_table
        (
          id
        , ethnic_group
        , income
        )
SELECT st.id
    , (
        SELECT TOP 1 ethnic_group
        FROM @source_table sub_st
        WHERE sub_st.id = st.id
        GROUP BY ethnic_group
        ORDER BY COUNT(sub_st.id) DESC
    ) 
    , (
        SELECT TOP 1 income
        FROM @source_table sub_st
        WHERE sub_st.id = st.id
        GROUP BY income
        ORDER BY COUNT(sub_st.id) DESC
    ) 
FROM @source_table st
GROUP BY st.id


/* View the destination to see there are no duplicates */
SELECT  id
      , ethnic_group
      , income
FROM @destination_table