我有两个查询正在进行类似的工作。 一个没有CTE,一个有CTE。我无法弄清楚为什么第二个查询在第一个查询完全没有结果的时候。
我花了最后两个小时尝试通过尝试各种连接来解决这个问题,并且在查询1中工作的相同连接在查询2中不起作用。我希望有人可以指导我。
第一个查询(返回结果):
WITH MessageCTE AS
(
SELECT dt
, id
, ts
, family
, message_type
, to_user
, message_id
, class
FROM dhruv.MessageLatencyInformation_20171210_20171125_to_20171130_02 as latencydata
INNER JOIN dhruv.UsersOn503AndAbove_20171201_200k as required_users
ON latencydata.to_user = required_users.user_id
)
SELECT COUNT(DISTINCT to_user) AS Users
, AVG(latency) AS AvgLatency
, AVG(CASE WHEN latency > 0 THEN latency ELSE NULL END) AS AvgLatency_Positive
, PERCENTILE(latency, 0.5) AS 50Percentile
, PERCENTILE(latency, 0.75) AS 75Percentile
, PERCENTILE(latency, 0.8) AS 80Percentile
, PERCENTILE(latency, 0.9) AS 90Percentile
, PERCENTILE(latency, 0.95) AS 95Percentile
, PERCENTILE(latency, 0.99) AS 99Percentile
FROM
(
SELECT a.dt, a.to_user, (latency_dl.ts - latency_pb.ts) as latency
FROM
(
SELECT dt
, id, ts
, family
, message_type
, to_user
, message_id
, class
FROM MessageCTE
WHERE class = 'pb'
) as latency_pb
INNER JOIN
(SELECT dt
, id
, ts
, family
, message_type
, to_user
, message_id
, class
FROM MessageCTE
WHERE class = 'rdl'
AND family = 'stm'
) as latency_rdl
ON latency_pb.dt = latency_rdl.dt and latency_pb.to_user = latency_rdl.to_user and latency_pb.id = latency_rdl.id
INNER JOIN
(
SELECT dt
, id
, ts
, family
, message_type
, to_user
, message_id
, class
FROM MessageCTE
WHERE class = 'dl'
) as latency_dl
ON latency_rdl.dt = latency_dl.dt and latency_rdl.to_user = latency_dl.to_user and latency_rdl.id = latency_dl.id) AS UserLatency;
现在的第二个查询,是一个轻微的修改和所有相同的条件,但由于某种原因,它返回没有匹配。希望有人可以指导我,我只花了大约2个小时尝试一些加入,我无法弄清楚为什么他们没有发生。
第二次查询:
WITH MessageCTE_pb AS
(
SELECT dt, id, ts, to_user
FROM
(
SELECT dt, id, min(ts) as ts, to_user
FROM dhruv.MessageLatencyInformation_20171210_20171125_to_20171130_02
WHERE class = 'pb'
GROUP BY dt, to_user, id
) as latencydata
INNER JOIN dhruv.UsersOn503AndAbove_20171201_200k as required_users
ON latencydata.to_user = required_users.user_id
)
, MessageCTE_dl AS
(
SELECT dt, id, ts, to_use
FROM
(
SELECT dt, id, max(ts) as ts, to_user
FROM dhruv.MessageLatencyInformation_20171210_20171125_to_20171130_02
WHERE class = 'dl'
GROUP BY dt, to_user, id
) as latencydata
INNER JOIN dhruv.UsersOn503AndAbove_20171201_200k as required_users
ON latencydata.to_user = required_users.user_id
)
, MessageCTE_rdl AS
(
SELECT dt, id, to_user
FROM
(
SELECT DISTINCT dt, id, to_user
FROM dhruv.MessageLatencyInformation_20171210_20171125_to_20171130_02
WHERE class = 'rdl'
AND family = 'stm'
) as latencydata
INNER JOIN dhruv.UsersOn503AndAbove_20171201_200k as required_users
ON latencydata.to_user = required_users.user_id
)
SELECT COUNT(DISTINCT to_user) AS Users
, AVG(latency) AS AvgLatency
, AVG(CASE WHEN latency > 0 THEN latency ELSE NULL END) AS AvgLatency_Positive
, PERCENTILE(latency, 0.5) AS 50Percentile
, PERCENTILE(latency, 0.75) AS 75Percentile
, PERCENTILE(latency, 0.8) AS 80Percentile
, PERCENTILE(latency, 0.9) AS 90Percentile
, PERCENTILE(latency, 0.95) AS 95Percentile
, PERCENTILE(latency, 0.99) AS 99Percentile
FROM
(
SELECT a.dt, a.to_user, (latency_dl.ts - latency_pb.ts) as latency
FROM MessageCTE_pb as latency_pb
INNER JOIN MessageCTE_rdl as latency_rdl
ON latency_pb.dt = latency_rdl.dt and latency_pb.to_user = latency_rdl.to_user and latency_pb.id = latency_rdl.id
INNER JOIN MessageCTE_dl as latency_dl
ON latency_rdl.dt = latency_dl.dt and latency_rdl.to_user = latency_dl.to_user and latency_rdl.id = latency_dl.id) AS UserLatency;
谢谢!
答案 0 :(得分:1)
答案块中的另一个评论,所以我可以发布一堆SQL ...
这是什么结果?
WITH
UserLatency AS
(
SELECT
latencydata.dt,
latencydata.to_user,
latencydata.id,
MAX(CASE WHEN latencydata.class = 'dl' THEN latencydata.ts END)
-
MIN(CASE WHEN latencydata.class = 'pb' THEN latencydata.ts END)
AS latency
FROM
dhruv.MessageLatencyInformation_20171210_20171125_to_20171130_02 AS latencydata
INNER JOIN
dhruv.UsersOn503AndAbove_20171201_200k AS required_users
ON latencydata.to_user = required_users.user_id
GROUP BY
latencydata.dt,
latencydata.to_user,
latencydata.id
HAVING
0 < SUM(CASE WHEN latencydata.class = 'rdl'
AND latencydata.family = 'stm' THEN 1 END)
)
SELECT
COUNT(DISTINCT to_user) AS Users
, AVG(latency) AS AvgLatency
, AVG(CASE WHEN latency > 0 THEN latency END) AS AvgLatency_Positive
, PERCENTILE(latency, 0.50) AS 50Percentile
, PERCENTILE(latency, 0.75) AS 75Percentile
, PERCENTILE(latency, 0.80) AS 80Percentile
, PERCENTILE(latency, 0.90) AS 90Percentile
, PERCENTILE(latency, 0.95) AS 95Percentile
, PERCENTILE(latency, 0.99) AS 99Percentile
FROM
UserLatency
;