ID NAME DURATION START FINISH P1 P2 P3
1 A 14 1
2 B 15 1
3 C 15 1
4 D 12 1
5 E 22 2 3
6 F 14 4 1
7 G 9 5 6
需要开发递归大查询SQL来计算计划的开始和结束时间 请注意,活动的时间表可以有所不同 P1,P2和P3是行活动的前任ID 一项活动可能只有1名前任或多名前任 开始和结束列的计算如下 Start等于“2017-01-01”或(Max(其前身+1完成))的较大值 完成始终是开始+持续时间-1的函数 在达到答案之前,预计计算将是递归的。
我在bigquery中需要这个的原因是通过迭代更改持续时间值来执行计划的模拟。
上述附表的答案如下:
ID NAME DURATION START FINISH P1 P2 P3
1 A 14 1-Jan-17 14-Jan-17 1
2 B 15 15-Jan-17 29-Jan-17 1
3 C 15 15-Jan-17 29-Jan-17 1
4 D 12 15-Jan-17 26-Jan-17 1
5 E 22 30-Jan-17 20-Feb-17 2 3
6 F 14 27-Jan-17 9-Feb-17 4 1
7 G 9 21-Feb-17 1-Mar-17 5 6
答案 0 :(得分:1)
BigQuery不支持递归查询
因此,您需要使用您选择的client
来自行编排递归
下面演示如何通过运行一系列查询来实现此目的,直到所有依赖关系都得到解决
第1步:准备初始表 -
yourproject.yourdataset.yourtable
(此问题的简单示例)
第2步:使用与源表相同的目标表运行迭代查询 -
yourproject.yourdataset.yourtable
,具有写入首选项>>覆盖表
#standardSQL
SELECT a.id, a.name, a.duration, a.p1, a.p2, a.p3,
start,
DATE_ADD(start, INTERVAL a.duration - 1 DAY) finish
FROM `yourproject.yourdataset.yourtable` a
LEFT JOIN `yourproject.yourdataset.yourtable` a1 ON a.p1 = a1.id
LEFT JOIN `yourproject.yourdataset.yourtable` a2 ON a.p2 = a2.id
LEFT JOIN `yourproject.yourdataset.yourtable` a3 ON a.p3 = a3.id
CROSS JOIN UNNEST([
IF(a.id = a.p1, DATE '2017-01-01', ((
SELECT DATE_ADD(MAX(finish), INTERVAL 1 DAY)
FROM UNNEST([a1.finish, a2.finish, a3.finish]) finish
WHERE IF(a.p1 IS NULL, 0, 1) + IF(a.p2 IS NULL, 0, 1) + IF(a.p3 IS NULL, 0, 1) =
IF(a1.finish IS NULL, 0, 1) + IF(a2.finish IS NULL, 0, 1) + IF(a3.finish IS NULL, 0, 1)
)))
]) start
WHERE NOT a.id IS NULL
ORDER BY a.id
第3步:检查是否还有要计算的条目
#standardSQL
SELECT COUNT(1) still_to_iterate
FROM `yourproject.yourdataset.yourtable`
WHERE start IS NULL
如果此处的count大于0 - 继续执行步骤2,依此类推,直到still_to_iterate = 0
处理示例:
如果你手动执行这些步骤 - 下面是你得到的
当然,对于更实际的情况 - 迭代次数可能很高,即使仍然可以手动完成,也会很快变得不高效!
这就是你可以使用你选择的client
或bq command line
以及一些bash / awk /等魔法编写逻辑上面的逻辑
答案 1 :(得分:1)
我希望通过一次查询运行进行迭代。是否可以使用内联JS ......?
以下是BigQuery Standard SQL
它使用JS UDF
在一次运行中完成所有迭代
这需要将整个表的数据传递到UDF,因此肯定是UDF limits
/ limitations
的主题
我怀疑这个解决方案是否具有真正的实用价值,但是通过练习和BigQuery功能探索未来的
还要注意这里做的一些假设,以简化JS代码并关注问题的根源。所以假设是:所有id字段都是连续填充,没有从值1开始的间隙(如果你愿意,你可以改进这个:o))
TS_MINY=p1.y-( (tempL*10)>>15);// 15 pixels du bord
您可以使用下面的虚拟数据(来自您的问题)测试/播放上面的
#standardSQL
CREATE TEMPORARY FUNCTION y(arr ARRAY<STRING>)
RETURNS ARRAY<STRUCT<id INT64, name STRING, duration INT64, start INT64, finish INT64, p1 INT64, p2 INT64, p3 INT64>>
LANGUAGE js AS """
var result = [], fin;
for (i = 0; i < arr.length; i++){result.push(JSON.parse(arr[i]))}
for (w = 0; w < 40; w++){ fin = true;
for (i = 0; i < arr.length; i++) {
if(result[i].start == null) { fin = false;
var x1 = parseInt(result[i].p1) - 1;
var x2 = parseInt(result[i].p2||result[i].p1) - 1;
var x3 = parseInt(result[i].p3||result[i].p1) - 1;
if(result[i].id == result[i].p1) {
result[i].start = 1;
result[i].finish = 1 + result[i].duration - 1;
} else if (result[x1].start !== null && result[x2].start !== null && result[x3].start !== null) {
result[i].start = Math.max(result[x1].finish, result[x2].finish, result[x3].finish) + 1;
result[i].finish = result[i].start + result[i].duration - 1;
}
}
} if (fin) {return result}
} return result;
""";
SELECT
id, name, duration,
DATE_ADD(DATE '2017-01-01', INTERVAL start - 1 DAY) start,
DATE_ADD(DATE '2017-01-01', INTERVAL finish - 1 DAY) finish,
p1, p2, p3
FROM (
SELECT rec.* FROM (
SELECT ARRAY_AGG(TO_JSON_STRING(t) ORDER BY id) AS data
FROM `yourTable` t
), UNNEST(y(data)) AS rec
) ORDER BY id
结果为
#standardSQL
CREATE TEMPORARY FUNCTION y(arr ARRAY<STRING>)
RETURNS ARRAY<STRUCT<id INT64, name STRING, duration INT64, start INT64, finish INT64, p1 INT64, p2 INT64, p3 INT64>>
LANGUAGE js AS """
var result = [], fin;
for (i = 0; i < arr.length; i++){result.push(JSON.parse(arr[i]))}
for (w = 0; w < 40; w++){ fin = true;
for (i = 0; i < arr.length; i++) {
if(result[i].start == null) { fin = false;
var x1 = parseInt(result[i].p1) - 1;
var x2 = parseInt(result[i].p2||result[i].p1) - 1;
var x3 = parseInt(result[i].p3||result[i].p1) - 1;
if(result[i].id == result[i].p1) {
result[i].start = 1;
result[i].finish = 1 + result[i].duration - 1;
} else if (result[x1].start !== null && result[x2].start !== null && result[x3].start !== null) {
result[i].start = Math.max(result[x1].finish, result[x2].finish, result[x3].finish) + 1;
result[i].finish = result[i].start + result[i].duration - 1;
}
}
} if (fin) {return result}
} return result;
""";
WITH `yourTable` AS (SELECT * FROM (
SELECT NULL id, NULL name, NULL duration, 1 start, 1 finish, 1 p1, 1 p2, 1 p3 UNION ALL
SELECT 1, 'A', 14, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 2, 'B', 15, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 3, 'C', 15, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 4, 'D', 12, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 5, 'E', 22, NULL, NULL, 2, 3, NULL UNION ALL
SELECT 6, 'F', 14, NULL, NULL, 4, 1, NULL UNION ALL
SELECT 7, 'G', 9, NULL, NULL, 5, 6, NULL
) WHERE NOT id IS NULL
)
SELECT
id, name, duration,
DATE_ADD(DATE '2017-01-01', INTERVAL start - 1 DAY) start,
DATE_ADD(DATE '2017-01-01', INTERVAL finish - 1 DAY) finish,
p1, p2, p3
FROM (
SELECT rec.* FROM (
SELECT ARRAY_AGG(TO_JSON_STRING(t) ORDER BY id) AS data
FROM `yourTable` t
), UNNEST(y(data)) AS rec
) ORDER BY id
答案 2 :(得分:0)
#standardSQL
CREATE TEMPORARY FUNCTION y(arr ARRAY<STRING>)
RETURNS ARRAY<STRUCT<id INT64, name STRING, duration INT64, start INT64, finish INT64, p1 INT64, p2 INT64, p3 INT64>>
LANGUAGE js AS """
var result = [],
for (var i = 0; i < arr.length; i++){result.push(JSON.parse(arr[i]))}
----------
result[1].finish= new Date(new Date('01-Jan-17').getTime()- 1 * 86400000)
for(var i=1;i<arr.length;i++){
if (result[i].p1 === ''){result[i].p1 = result[1].id}
if (result[i].p2 === ''){result[i].p2 = result[1].id}
if (result[i].p3 === ''){result[i].p3 = result[1].id}
}
for(var i=1;i<arr.length;i++){
result[i].start= new Date(Math.max( new Date('01-Jan-17').getTime()
,Math.max(result[result[i][result[i].p1]].finish.getTime() + 1 * 86400000 ,
result[result[i][result[i].p2]].finish.getTime() + 1 * 86400000 ,
result[result[i][result[i].p3]].finish.getTime() + 1 * 86400000 )
)
)
result[i].finish= new Date( result[i].start.getTime() + result[i].duration * 86400000 - 1 * 86400000 )
}
return result;
""";
WITH `yourTable` AS (SELECT * FROM (
SELECT NULL id, NULL name, NULL duration, 1 start, 1 finish, 1 p1, 1 p2, 1 p3 UNION ALL
SELECT 1, 'A', 14, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 2, 'B', 15, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 3, 'C', 15, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 4, 'D', 12, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 5, 'E', 22, NULL, NULL, 2, 3, NULL UNION ALL
SELECT 6, 'F', 14, NULL, NULL, 4, 1, NULL UNION ALL
SELECT 7, 'G', 9, NULL, NULL, 5, 6, NULL
) WHERE NOT id IS NULL
)
SELECT
id, name, duration,
start,
finish,
p1, p2, p3
FROM (
SELECT rec.* FROM (
SELECT ARRAY_AGG(TO_JSON_STRING(t) ORDER BY id) AS data
FROM `yourTable` t
), UNNEST(y(data)) AS rec
) ORDER BY id
答案 3 :(得分:0)
Scripting和Stored Procedures的支持现已处于测试阶段(截至2019年10月)
您可以提交多个用分号分隔的语句,BigQuery现在可以运行它们。
我希望通过一次查询来进行迭代
因此,现在您可以将所需的逻辑实现为一个纯SQL脚本(不涉及JS UDF,并且不需要手动迭代),如下面的示例
DECLARE cnt INT64;
CREATE TEMP TABLE temp_table AS SELECT * FROM (
SELECT NULL id, NULL name, NULL duration, CURRENT_DATE() start, CURRENT_DATE() finish, 1 p1, 1 p2, 1 p3 UNION ALL
SELECT 1, 'A', 14, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 2, 'B', 15, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 3, 'C', 15, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 4, 'D', 12, NULL, NULL, 1, NULL, NULL UNION ALL
SELECT 5, 'E', 22, NULL, NULL, 2, 3, NULL UNION ALL
SELECT 6, 'F', 14, NULL, NULL, 4, 1, NULL UNION ALL
SELECT 7, 'G', 9, NULL, NULL, 5, 6, NULL
) WHERE NOT id IS NULL;
LOOP
CREATE OR REPLACE TEMP TABLE temp_table AS
SELECT a.id, a.name, a.duration, a.p1, a.p2, a.p3,
start, DATE_ADD(start, INTERVAL a.duration - 1 DAY) finish
FROM temp_table a
LEFT JOIN temp_table a1 ON a.p1 = a1.id
LEFT JOIN temp_table a2 ON a.p2 = a2.id
LEFT JOIN temp_table a3 ON a.p3 = a3.id
CROSS JOIN UNNEST([
IF(a.id = a.p1, DATE '2017-01-01', ((
SELECT DATE_ADD(MAX(finish), INTERVAL 1 DAY)
FROM UNNEST([a1.finish, a2.finish, a3.finish]) finish
WHERE IF(a.p1 IS NULL, 0, 1) + IF(a.p2 IS NULL, 0, 1) + IF(a.p3 IS NULL, 0, 1) =
IF(a1.finish IS NULL, 0, 1) + IF(a2.finish IS NULL, 0, 1) + IF(a3.finish IS NULL, 0, 1)
)))
]) start
WHERE NOT a.id IS NULL;
SET cnt = (SELECT COUNT(1) FROM temp_table WHERE start IS NULL);
IF cnt = 0 THEN BREAK; END IF;
END LOOP;
SELECT * FROM temp_table ORDER BY id;
上面的脚本最终执行12个作业-一个父作业和11个子作业
,如果您要检查最终作业的结果-您将看到结果表