在StackOverflow和其他网站中找到了一些解决方案,以找到中位数,但是所有这些都可与以下主体一起使用-中位数是该行的行数的一半更少,另一半更大。但是对于偶数行,中位数是两个中间值的平均值。如何在MySQL中计算?
答案 0 :(得分:1)
给出以下模式
CREATE TABLE numbers (
i INT AUTO_INCREMENT PRIMARY KEY,
n INT,
INDEX(n)
);
我们想找到n
列的中位数。
with sorted as (
select t.n, row_number() over (order by t.n) as rn
from numbers t
), cnt as (
select count(*) as c from numbers
)
select avg(s.n) as median
from sorted s
cross join cnt
where s.rn between floor((c+1)/2) and ceil((c+1)/2);
性能:可以(对于10万行140ms)
drop temporary table if exists tmp;
create temporary table tmp(
rn int auto_increment primary key,
n int
) engine=memory;
insert into tmp(n)
select n
from numbers
order by n;
select avg(n) as median
from tmp
cross join (select count(*) as c from numbers) cnt
where rn between floor((c+1)/2) and ceil((c+1)/2);
性能:可以(对于10万行,为110ms)
set @c = (select count(*) from numbers);
set @limit = 2 - (@c % 2);
set @offset = (@c+1) div 2 - 1;
prepare stmt from '
select avg(n) as median
from (
select n
from numbers
limit ? offset ?
) sub
';
execute stmt using @limit, @offset;
性能:最佳(10万行50ms)
select avg(n) as median
from (
select t.n
from numbers t
cross join numbers t2
group by t.i
having greatest(sum(t2.n < t.n), sum(t2.n > t.n))
<= (select count(*) from numbers) / 2
) sub
select avg(n) as median
from (
select t.n
from numbers t
cross join numbers t2
group by t.n
having greatest(sum(t2.n < t.n), sum(t2.n > t.n)) / sqrt(SUM(t2.n = t.n))
<= (select count(*) from numbers)/2
) sub
性能:最差-O(n²)(每1K行500ms)