我正在尝试计算一列中重复项的总数(而不是单个重复项)。
from outputs
GROUP BY journal_id
HAVING ( COUNT(doi) > 1 )
WHERE journal_id = 1
SQL表
doi journal_id
123 1
123 2
123 1
124 1
预期答案为2
答案 0 :(得分:2)
可以通过取总行数并减去不重复的行数来计算整个行重复项的数量:
select a.cnt_all - d.cnt_individual
from (select count(*) as cnt_all
from outputs
) a cross join
(select count(*) as cnt_individual
from (select distinct *
from outputs
) d
) d;
如果您知道自己的列并且数据库支持count(distinct)
的多个参数,则可以将其从根本上简化为:
select count(*) - count(distinct doi, journal_id)
from outputs;
或者,如果您的数据库不支持此功能:
select sum(cnt - 1)
from (select doi, journal_id, count(*) as cnt
from outputs
group by doi, journal_id
) o;
答案 1 :(得分:0)
只需按日记帐ID汇总单个重复项的计数即可。
SELECT
SUM(COUNT(doi)) AS total_duplicates
from
outputs
WHERE
journal_id = 1
GROUP BY
journal_id
HAVING
(COUNT(doi) > 1)