我需要根据列的所有值找到每行的两个最接近的距离。
我尝试进行交叉连接,并使用导数功能查找距离。我完全不确定怎么写。请提出建议。
select a.id,lead(a.value,b.value) as distance from cluster a , cluster b
输入表:
ID Values
1 12.1
2 11
3 14
4 10
5 9
6 15
7 16
8 8
ID Values Closest_Value
1 12.1 11,10
2 11 9,10
3 14 15,16
4 10 9,11
5 9 8,10
6 15 14,16
7 16 14,15
8 8 9,10
答案 0 :(得分:0)
一种方法使用交叉连接和聚合:
select id, value,
listagg(other_value, ',') within group (order by diff) as near_values
from (select c.id, c.value, c2.value as other_value
abs(c2.value = c.value) as diff,
row_number() over (partition by c.id order by abs(c2.value = c.value)) as seqnum
from cluster c join
cluster c2
on c.id <> c2.id
) c
where seqnum <= 2
group by id, value;
对于大量数据,上述方法并不是特别有效。一种替代方法是使用lead()
和lag()
来获取值,取消透视和聚合:
with vals as (
select c.id, c.value,
(case when n.n = 1 then prev_value_2
when n.n = 2 then prev_value
when n.n = 3 then next_value
when n.n = 4 then next_value_2
end) as other_value
from (select c.*,
lag(value, 2) over (order by value) as prev_value_2,
lag(value) over (order by value) as prev_value,
lead(value) over (order by value) as next_value,
lead(value, 2) over (order by value) as next_value_2,
from clusters c
) c cross join
(select rownum as n
from clusters
where rownum <= 4
) n -- just a list of 4 numbers
)
select v.id, v.value,
list_agg(other_value, ',') within group (order by diff)
from (select v.*,
abs(other_value - value) as diff
row_number() over (partition by id order by abs(other_value - value)) as seqnum
from vals v
) v
where seqnum <= 2
group by id, value;