我试图在给定日期之前和之后找到平均分数,其中每个用户都有自己想要使用的日期。
我有2个表,第一个包括代理名称,分数和日期:
Name Score Date
---- ----- ----
Dan 81 10/1/2016
Brad 35 8/5/2016
Allison 92 6/3/2016
Cindy 95 8/12/2016
Dan 45 7/16/2016
Cindy 77 4/16/2016
Allison 59 3/22/2016
Brad 55 3/22/2016
第二个表包括代理人姓名和他们接受培训的日期
Agent_name Training_date
---------- ----------
Dan 8/28/2016
Brad 4/15/2016
Cindy 3/3/2016
Allison 5/1/2016
我正在寻找的是一个输出,其中包括姓名,培训日期,培训前的平均值和培训后的平均值。理想情况下看起来像这样
Agent_name Training_date Avg_pre_training Avg_post_training
---------- ------------- ---------------- -----------------
Dan 8/28/2016 45 81
Brad 4/15/2016 55 35
Cindy 3/3/2016 0 86
Allison 5/1/2016 59 92
我似乎无法得到一个查询,表明每个人都有自己的日期需要考虑。
答案 0 :(得分:2)
以下是BigQuery Standard SQL
#standardSQL
SELECT
Agent_name, Training_date,
ROUND(AVG(CASE WHEN date <= Training_date THEN Score END)) AS Avg_pre_training,
ROUND(AVG(CASE WHEN date > Training_date THEN Score END)) AS Avg_post_training
FROM (
SELECT
Agent_name, Score,
PARSE_DATE('%m/%d/%Y', date) AS date,
PARSE_DATE('%m/%d/%Y', Training_date) AS Training_date
FROM training JOIN agents
ON Name = Agent_name
)
GROUP BY Agent_name, Training_date
-- ORDER BY Agent_name, Training_date
您可以使用相关示例中的虚拟数据
来使用此查询#standardSQL
WITH agents AS (
SELECT 'Dan' AS Name, 81 AS Score, '10/1/2016' AS date UNION ALL
SELECT 'Brad', 35, '8/5/2016' UNION ALL
SELECT 'Allison', 92, '6/3/2016' UNION ALL
SELECT 'Cindy', 95, '8/12/2016' UNION ALL
SELECT 'Dan', 45, '7/16/2016' UNION ALL
SELECT 'Cindy', 77, '4/16/2016' UNION ALL
SELECT 'Allison', 59, '3/22/2016' UNION ALL
SELECT 'Brad', 55, '3/22/2016' UNION ALL
SELECT 'Allison', 70, '6/25/2016'
),
training AS (
SELECT 'Dan' AS Agent_name, '8/28/2016' AS Training_date UNION ALL
SELECT 'Brad', '4/15/2016' UNION ALL
SELECT 'Cindy', '3/3/2016' UNION ALL
SELECT 'Allison', '5/1/2016' UNION ALL
SELECT 'Allison', '6/28/2016'
)
SELECT
Agent_name, Training_date,
ROUND(AVG(CASE WHEN date <= Training_date THEN Score END)) AS Avg_pre_training,
ROUND(AVG(CASE WHEN date > Training_date THEN Score END)) AS Avg_post_training
FROM (
SELECT
Agent_name, Score,
PARSE_DATE('%m/%d/%Y', date) AS date,
PARSE_DATE('%m/%d/%Y', Training_date) AS Training_date
FROM training JOIN agents
ON Name = Agent_name
)
GROUP BY Agent_name, Training_date
-- ORDER BY Agent_name, Training_date
注意:我添加了几行以使示例更通用,以解决同一用户的多次培训案例
答案 1 :(得分:0)
请参阅下面的答案,使用我控制的where语句进行训练前和训练后,然后将两个表连接在一起以获得结果集。
CREATE TABLE #SET1
(
NAME VARCHAR(20),
SCORE INT,
[DATE] DATE
)
CREATE TABLE #TRAININGDATE
(
NAME VARCHAR(20),
TRAINING_DATE DATE
)
INSERT INTO #SET1
( NAME, SCORE, DATE )
VALUES
('Dan',81,'10/1/2016'),
('Brad',35,'8/5/2016'),
('Allison',92,'6/3/2016'),
('Cindy',95,'8/12/2016'),
('Dan',45,'7/16/2016'),
('Cindy',77,'4/16/2016'),
('Allison',59,'3/22/2016'),
('Brad',55,'3/22/2016')
INSERT INTO #TRAININGDATE
VALUES
('DAN','8/28/2016'),
('BRAD','4/15/2016'),
('CINDY','3/3/2016'),
('ALLISON','5/1/2016')
SELECT AVG(SCORE) AS AVERAGE_SCORE_BEFORE, A.NAME
INTO #TEMP_A
FROM #SET1 AS A
LEFT JOIN #TRAININGDATE AS B
ON A.NAME = B.NAME
WHERE DATE < B.TRAINING_DATE
GROUP BY A.NAME
SELECT AVG(SCORE) AS AVERAGE_SCORE_AFTER_TRAINING, A.NAME
INTO #TEMP_B
FROM #SET1 AS A
LEFT JOIN #TRAININGDATE AS B
ON A.NAME = B.NAME
WHERE DATE > B.TRAINING_DATE
GROUP BY A.NAME
SELECT A.NAME,ISNULL(B.AVERAGE_SCORE_BEFORE,0) AS AVERAGE_PRE_TRAINING,A.AVERAGE_SCORE_AFTER_TRAINING
FROM #TEMP_B AS A
LEFT JOIN #TEMP_A AS B
ON A.NAME = B.NAME
答案 2 :(得分:0)
您可以使用派生表来完成此任务:
SELECT T.Agent_Name, T.Training_Date, Avg_Pre_Training, Avg_Post_Training
FROM Training as T
JOIN (SELECT T.Agent_Name, AVG(Score) as Avg_Pre_Training
FROM Training as T
JOIN Scores as S on S.Name= T.Agent_Name
WHERE S.Date < T.Training_Date
GROUP BY T.Agent_Name
) as Pre on Pre.Agent_Name= T.Agent_Name
JOIN (SELECT T.Agent_Name, AVG(Score) as Avg_Post_Training
FROM Training as T
JOIN Scores as S on S.Name= T.Agent_Name
WHERE S.Date >= T.Training_Date
GROUP BY T.Agent_Name
) as Post on Post.Agent_Name= T.Agent_Name
我不完全确定我是否正确使用了别名'bigquery,这是#legacySQL
语法,因此可能需要进行一些调整。
答案 3 :(得分:0)
这应该在Standard SQL中解决:
with table1 as(
select 'Dan' as agent_name, 81 as score, '10/1/2016' as date union all
select 'Brad', 35, '8/5/2016' union all
select 'Allison', 92, '6/3/2016' union all
select 'Cindy', 95, '8/12/2016' union all
select 'Dan', 45, '7/16/2016' union all
select 'Cindy', 77, '4/16/2016' union all
select 'Allison', 59, '3/22/2016' union all
select 'Brad', 55, '3/22/2016'),
table2 as(
select 'Dan' agent_name, '8/28/2016' as train_date union all
select 'Brad', '4/15/2016' union all
select 'Cindy', '3/3/2016' union all
select 'Allison', '5/1/2016'
)
select
t1.agent_name name,
t2.train_date train_date,
avg(case when parse_date("%m/%d/%Y", t2.train_date) >= parse_date("%m/%d/%Y", t1.date) then t1.score end) pre_score,
avg(case when parse_date("%m/%d/%Y", t2.train_date) < parse_date("%m/%d/%Y", t1.date) then t1.score end) pos_score
from table1 t1
join table2 t2
on t1.agent_name = t2.agent_name
group by name, train_date
强烈建议您在BigQuery中使用此版本。