我有一张这样的表:
a | user_id
----------+-------------
0.1133 | 2312882332
4.3293 | 7876123213
3.1133 | 2312332332
1.3293 | 7876543213
0.0033 | 2312222332
5.3293 | 5344343213
3.2133 | 4122331112
2.3293 | 9999942333
我想找到一个特定的行 - 例如1.3293 | 7876543213
- 并选择最近的4行。如果可能,上面2,下面2
排序顺序为 ORDER BY a ASC
。
在这种情况下,我会得到:
0.0033 | 2312222332
0.1133 | 2312882332
2.3293 | 9999942333
3.1133 | 2312332332
如何使用PostgreSQL实现这一目标? (顺便说一下,我正在使用PHP。)
P.S。:对于最后一行或第一行,最近的行将是4以上或4以下。
答案 0 :(得分:4)
CREATE TEMP TABLE tbl(a float, user_id bigint);
INSERT INTO tbl VALUES
(0.1133, 2312882332)
,(4.3293, 7876123213)
,(3.1133, 2312332332)
,(1.3293, 7876543213)
,(0.0033, 2312222332)
,(5.3293, 5344343213)
,(3.2133, 4122331112)
,(2.3293, 9999942333);
WITH x AS (
SELECT a
,user_id
,row_number() OVER (ORDER BY a, user_id) AS rn
FROM tbl
), y AS (
SELECT rn, LEAST(rn - 3, (SELECT max(rn) - 5 FROM x)) AS min_rn
FROM x
WHERE (a, user_id) = (1.3293, 7876543213)
)
SELECT *
FROM x, y
WHERE x.rn > y.min_rn
AND x.rn <> y.rn
ORDER BY x.a, x.user_id
LIMIT 4;
返回问题中描述的结果。假设(a, user_id)
是唯一的。
目前尚不清楚a
是否应该是唯一的。这就是为什么我按user_id
排序以打破关系。这也是我使用window function row_number()
,不 rank()
的原因。 row_number()
无论如何都是正确的工具。我们想要4行。如果排序顺序中存在对等项,rank()
将给出未定义的行数。
只要表中至少有5行,此始终将返回4行。接近第一行/最后一行,返回第一行/最后一行。在所有其他情况下之前/之后的两行。标准行本身被排除在外。
这是@Tim Landscheidt发布的改进版本。如果你喜欢索引的想法,请投票给他答案。不要打扰小桌子。但是如果你有适合的指数,那么会提高大表的表现。最佳选择是(a, user_id)
上的multicolumn index。
WITH params(_a, _user_id) AS (SELECT 5.3293, 5344343213) -- enter params once
,x AS (
(
SELECT a
,user_id
,row_number() OVER (ORDER BY a DESC, user_id DESC) AS rn
FROM tbl, params p
WHERE a < p._a
OR a = p._a AND user_id < p._user_id -- a is not defined unique
ORDER BY a DESC, user_id DESC
LIMIT 5 -- 4 + 1: including central row
)
UNION ALL -- UNION right away, trim one query level
(
SELECT a
,user_id
,row_number() OVER (ORDER BY a ASC, user_id ASC) AS rn
FROM tbl, params p
WHERE a > p._a
OR a = p._a AND user_id > p._user_id
ORDER BY a ASC, user_id ASC
LIMIT 5
)
)
, y AS (
SELECT a, user_id
FROM x, params p
WHERE (a, user_id) <> (p._a, p._user_id) -- exclude central row
ORDER BY rn -- no need to ORDER BY a
LIMIT 4
)
SELECT *
FROM y
ORDER BY a, user_id -- ORDER result as requested
根据问题(a, user_id)
形成搜索条件,而不只是a
。这会以微妙的方式更改窗口框架ORDER BY
和WHERE
子句。
UNION
马上,不需要额外的查询级别。您需要围绕两个UNION查询使用括号以允许单个ORDER BY
。
按要求排序结果。需要另一个查询级别(几乎不需要任何费用)。
由于参数在多个地方使用,我将输入集中在一个主要的CTE中 对于重复使用,您可以将此查询几乎“原样”包装到SQL或plpgsql函数中。
答案 1 :(得分:2)
还有一个:
WITH prec_rows AS
(SELECT a,
user_id,
ROW_NUMBER() OVER (ORDER BY a DESC) AS rn
FROM tbl
WHERE a < 1.3293
ORDER BY a DESC LIMIT 4),
succ_rows AS
(SELECT a,
user_id,
ROW_NUMBER() OVER (ORDER BY a ASC) AS rn
FROM tbl
WHERE a > 1.3293
ORDER BY a ASC LIMIT 4)
SELECT a, user_id
FROM
(SELECT a,
user_id,
rn
FROM prec_rows
UNION ALL SELECT a,
user_id,
rn
FROM succ_rows) AS s
ORDER BY rn, a LIMIT 4;
AFAIR WITH
将实例化一个内存表,因此该解决方案的重点是尽可能地限制其大小(在这种情况下为八行)。
答案 2 :(得分:0)
set search_path='tmp';
DROP TABLE lutser;
CREATE TABLE lutser
( val float
, num bigint
);
INSERT INTO lutser(val, num)
VALUES ( 0.1133 , 2312882332 )
,( 4.3293 , 7876123213 )
,( 3.1133 , 2312332332 )
,( 1.3293 , 7876543213 )
,( 0.0033 , 2312222332 )
,( 5.3293 , 5344343213 )
,( 3.2133 , 4122331112 )
,( 2.3293 , 9999942333 )
;
WITH ranked_lutsers AS (
SELECT val, num
,rank() OVER (ORDER BY val) AS rnk
FROM lutser
)
SELECT that.val, that.num
, (that.rnk-this.rnk) AS relrnk
FROM ranked_lutsers that
JOIN ranked_lutsers this ON (that.rnk BETWEEN this.rnk-2 AND this.rnk+2)
WHERE this.val = 1.3293
;
结果:
DROP TABLE
CREATE TABLE
INSERT 0 8
val | num | relrnk
--------+------------+--------
0.0033 | 2312222332 | -2
0.1133 | 2312882332 | -1
1.3293 | 7876543213 | 0
2.3293 | 9999942333 | 1
3.1133 | 2312332332 | 2
(5 rows)
正如Erwin指出的那样,输出中不需要中心行。此外,应使用row_number()而不是rank()。
WITH ranked_lutsers AS (
SELECT val, num
-- ,rank() OVER (ORDER BY val) AS rnk
, row_number() OVER (ORDER BY val, num) AS rnk
FROM lutser
) SELECT that.val, that.num
, (that.rnk-this.rnk) AS relrnk
FROM ranked_lutsers that
JOIN ranked_lutsers this ON (that.rnk BETWEEN this.rnk-2 AND this.rnk+2 )
WHERE this.val = 1.3293
AND that.rnk <> this.rnk
;
结果2:
val | num | relrnk
--------+------------+--------
0.0033 | 2312222332 | -2
0.1133 | 2312882332 | -1
2.3293 | 9999942333 | 1
3.1133 | 2312332332 | 2
(4 rows)
UPDATE2:总是选择四个,即使我们位于列表的顶部或底部。这使查询有点丑陋。 (但不像欧文那样丑陋; - )
WITH ranked_lutsers AS (
SELECT val, num
-- ,rank() OVER (ORDER BY val) AS rnk
, row_number() OVER (ORDER BY val, num) AS rnk
FROM lutser
) SELECT that.val, that.num
, ABS(that.rnk-this.rnk) AS srtrnk
, (that.rnk-this.rnk) AS relrnk
FROM ranked_lutsers that
JOIN ranked_lutsers this ON (that.rnk BETWEEN this.rnk-4 AND this.rnk+4 )
-- WHERE this.val = 1.3293
WHERE this.val = 0.1133
AND that.rnk <> this.rnk
ORDER BY srtrnk ASC
LIMIT 4
;
输出:
val | num | srtrnk | relrnk
--------+------------+--------+--------
0.0033 | 2312222332 | 1 | -1
1.3293 | 7876543213 | 1 | 1
2.3293 | 9999942333 | 2 | 2
3.1133 | 2312332332 | 3 | 3
(4 rows)
更新:具有嵌套CTE的版本(具有外部连接!!!)。为方便起见,我在表格中添加了一个主键,无论如何,这听起来都是个好主意。
WITH distance AS (
WITH ranked_lutsers AS (
SELECT id
, row_number() OVER (ORDER BY val, num) AS rnk
FROM lutser
) SELECT l0.id AS one
,l1.id AS two
, ABS(l1.rnk-l0.rnk) AS dist
-- Warning: Cartesian product below
FROM ranked_lutsers l0
, ranked_lutsers l1 WHERE l0.id <> l1.id
)
SELECT lu.*
FROM lutser lu
JOIN distance di
ON lu.id = di.two
WHERE di.one= 1
ORDER by di.dist
LIMIT 4
;