我有下面的数据集,看起来像这样。
t mean max min std data_id
4/14/2010 0:00 12.6941 12.6941 12.6941 12.6941 1
4/14/2010 0:00 12.3851 12.3851 12.3851 12.3851 2
4/14/2010 0:20 12.389 12.389 12.389 12.389 1
4/14/2010 0:20 12.1836 12.1836 12.1836 12.1836 2
4/14/2010 0:20 11.3887 11.3887 11.3887 11.3887 4
我想将数据转换为
t,str_agg
'2010-04-14 00:00:00','12.6941','12.6941','12.6941','12.6941','12.3851','12.3851','12.3851','12.3851',,,,
'2010-04-14 00:10:00','12.3890','12.3890','12.3890','12.3890','12.1836','12.1836','12.1836','12.1836','11.3887','11.3887','11.3887','11.3887
我尝试过以下查询: -
WITH dataset AS (
SELECT *
FROM
(
VALUES
('2010-04-14T00:00'::TIMESTAMP, 12.6941, 12.6941, 12.6941, 12.6941, 1),
('2010-04-14T00:00'::TIMESTAMP, 12.3851, 12.3851, 12.3851, 12.3851, 2),
('2010-04-14T00:20'::TIMESTAMP, 12.389, 12.389, 12.389, 12.389, 1),
('2010-04-14T00:20'::TIMESTAMP, 12.1836, 12.1836, 12.1836, 12.1836, 2),
('2010-04-14T00:20'::TIMESTAMP, 11.3887, 11.3887, 11.3887, 11.3887, 13)
) AS data(t, mean, max, min, std, data_id)
),
dataset_full AS (
SELECT
coalesce(t, time) AS t,
mean,
max,
min,
std,
data_id
FROM
generate_series(
(SELECT min(t) FROM dataset),
(SELECT max(t) FROM dataset),
'10 minutes')
AS times(time)
CROSS JOIN generate_series(
(SELECT min(data_id) FROM dataset),
(SELECT max(data_id) FROM dataset))
AS data_id(id)
LEFT JOIN dataset ON times.time = dataset.t AND data_id.id = dataset.data_id
)
SELECT
t,
string_agg(concat(mean, ',', max, ',', min, ',', std), ',')
FROM dataset_full
GROUP BY t
ORDER BY t;
我得到以下结果: -
'2010-04-14 00:00:00','12.6941,12.6941,12.6941,12.6941,12.3851,12.3851,12.3851,12.3851,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,'
'2010-04-14 00:10:00',',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,'
'2010-04-14 00:20:00','12.389,12.389,12.389,12.389,12.1836,12.1836,12.1836,12.1836,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.3887,11.3887,11.3887,11.3887'
但我想要以下结果: -
'2010-04-14 00:00:00','12.6941,12.6941,12.6941,12.6941,12.3851,12.3851,12.3851,12.3851,,,,'
'2010-04-14 00:20:00','12.389,12.389,12.389,12.389,12.1836,12.1836,12.1836,12.1836,11.3887,11.3887,11.3887,11.3887'
任何人都可以帮我解决上述问题!
答案 0 :(得分:1)
您的基本问题是您正在生成时间间隔和数据集ID,而不是从数据中读取它们。这会影响dataset_full
CTE。您似乎只想要数据中的某些值。
因此:
with dataset as (
select *
from (values ('2010-04-14T00:00'::TIMESTAMP, 12.6941, 12.6941, 12.6941, 12.6941, 1),
('2010-04-14T00:00'::TIMESTAMP, 12.3851, 12.3851, 12.3851, 12.3851, 2),
('2010-04-14T00:20'::TIMESTAMP, 12.389, 12.389, 12.389, 12.389, 1),
('2010-04-14T00:20'::TIMESTAMP, 12.1836, 12.1836, 12.1836, 12.1836, 2),
('2010-04-14T00:20'::TIMESTAMP, 11.3887, 11.3887, 11.3887, 11.3887, 13)
) AS data(t, mean, max, min, std, data_id)
),
dataset_full as (
select t.t, d.data_id,
ds.mean, ds.max, ds.min, ds.std
from (select distinct t from dataset) t cross join
(select distinct data_id from dataset) d left join
dataset ds
on ds.t = t.t and ds.data_id = d.data_id
)
select t,string_agg(concat(mean, ',', max, ',', min, ',', std), ',' order by data_id)
from dataset_full
group by t
order by t;
Here是SQL小提琴。
另请注意order by
中的string_agg()
。据推测,您希望这些值的顺序为dataset_id
。
答案 1 :(得分:0)
您可以将LEFT JOIN
替换为JOIN
:
WITH dataset AS (
SELECT *
FROM(VALUES
('2010-04-14T00:00'::TIMESTAMP, 12.6941, 12.6941, 12.6941, 12.6941, 1),
('2010-04-14T00:00'::TIMESTAMP, 12.3851, 12.3851, 12.3851, 12.3851, 2),
('2010-04-14T00:20'::TIMESTAMP, 12.389, 12.389, 12.389, 12.389, 1),
('2010-04-14T00:20'::TIMESTAMP, 12.1836, 12.1836, 12.1836, 12.1836, 2),
('2010-04-14T00:20'::TIMESTAMP, 11.3887, 11.3887, 11.3887, 11.3887, 13)
) AS data(t, mean, max, min, std, data_id)
),
dataset_full AS (
SELECT coalesce(t, time) AS t,
mean,
max,
min,
std,
data_id
FROM generate_series(
(SELECT min(t) FROM dataset),
(SELECT max(t) FROM dataset),
'10 minutes')
AS times(time)
CROSS JOIN generate_series(
(SELECT min(data_id) FROM dataset),
(SELECT max(data_id) FROM dataset))
AS data_id(id)
JOIN dataset -- here
ON times.time = dataset.t
AND data_id.id = dataset.data_id
)
SELECT t,string_agg(concat(mean, ',', max, ',', min, ',', std), ',')
FROM dataset_full
GROUP BY t
ORDER BY t;
<强> DBFiddle Demo 强>
修改强>
第一行中的,,,,被删除,我不想
...
cte2 AS (
SELECT t, string_agg(concat(mean, ',', max, ',', min, ',', std), ',') AS s
, COUNT(*) AS c
FROM dataset_full
GROUP BY t
)
SELECT t, s|| REPEAT(',,,,', (MAX(c) OVER() - c)::int)
FROM cte2
ORDER BY t;
<强> DBFiddle Demo2 强>
输出:
┌─────────────────────┬─────────────────────────────────────────────────────────────────────────────────────────────┐
│ t │ result │
├─────────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────┤
│ 2010-04-14 00:00:00 │ 12.6941,12.6941,12.6941,12.6941,12.3851,12.3851,12.3851,12.3851,,,, │
│ 2010-04-14 00:20:00 │ 12.389,12.389,12.389,12.389,12.1836,12.1836,12.1836,12.1836,11.3887,11.3887,11.3887,11.3887 │
└─────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────┘