SQL查询,查找每行不同日期后的平均值

时间:2017-05-25 15:05:24

标签: sql google-bigquery

我试图在给定日期之前和之后找到平均分数,其中每个用户都有自己想要使用的日期。

我有2个表,第一个包括代理名称,分数和日期:

Name     Score   Date
----     -----   ----
Dan      81      10/1/2016
Brad     35      8/5/2016
Allison  92      6/3/2016
Cindy    95      8/12/2016
Dan      45      7/16/2016
Cindy    77      4/16/2016
Allison  59      3/22/2016
Brad     55      3/22/2016

第二个表包括代理人姓名和他们接受培训的日期

Agent_name   Training_date
----------   ----------
Dan          8/28/2016
Brad         4/15/2016
Cindy        3/3/2016
Allison      5/1/2016

我正在寻找的是一个输出,其中包括姓名,培训日期,培训前的平均值和培训后的平均值。理想情况下看起来像这样

Agent_name   Training_date   Avg_pre_training   Avg_post_training
----------   -------------   ----------------   -----------------
Dan          8/28/2016       45                 81
Brad         4/15/2016       55                 35
Cindy        3/3/2016        0                  86
Allison      5/1/2016        59                 92

我似乎无法得到一个查询,表明每个人都有自己的日期需要考虑。

4 个答案:

答案 0 :(得分:2)

以下是BigQuery Standard SQL

  
#standardSQL
SELECT 
  Agent_name, Training_date, 
  ROUND(AVG(CASE WHEN date <= Training_date THEN Score END)) AS Avg_pre_training,
  ROUND(AVG(CASE WHEN date > Training_date THEN Score END)) AS Avg_post_training
FROM (
  SELECT 
    Agent_name, Score,
    PARSE_DATE('%m/%d/%Y', date) AS date, 
    PARSE_DATE('%m/%d/%Y', Training_date) AS Training_date
  FROM training JOIN agents 
  ON Name = Agent_name
)
GROUP BY Agent_name, Training_date
-- ORDER BY Agent_name, Training_date

您可以使用相关示例中的虚拟数据

来使用此查询
#standardSQL
WITH agents AS (
  SELECT 'Dan' AS Name, 81 AS Score, '10/1/2016' AS date UNION ALL
  SELECT 'Brad', 35, '8/5/2016' UNION ALL
  SELECT 'Allison', 92, '6/3/2016' UNION ALL
  SELECT 'Cindy', 95, '8/12/2016' UNION ALL
  SELECT 'Dan', 45, '7/16/2016' UNION ALL
  SELECT 'Cindy', 77, '4/16/2016' UNION ALL
  SELECT 'Allison', 59, '3/22/2016' UNION ALL
  SELECT 'Brad', 55, '3/22/2016' UNION ALL
  SELECT 'Allison', 70, '6/25/2016' 
),
training AS (
  SELECT 'Dan' AS Agent_name, '8/28/2016' AS Training_date UNION ALL
  SELECT 'Brad', '4/15/2016' UNION ALL
  SELECT 'Cindy', '3/3/2016' UNION ALL
  SELECT 'Allison', '5/1/2016' UNION ALL
  SELECT 'Allison', '6/28/2016' 
)
SELECT 
  Agent_name, Training_date, 
  ROUND(AVG(CASE WHEN date <= Training_date THEN Score END)) AS Avg_pre_training,
  ROUND(AVG(CASE WHEN date > Training_date THEN Score END)) AS Avg_post_training
FROM (
  SELECT 
    Agent_name, Score,
    PARSE_DATE('%m/%d/%Y', date) AS date, 
    PARSE_DATE('%m/%d/%Y', Training_date) AS Training_date
  FROM training JOIN agents 
  ON Name = Agent_name
)
GROUP BY Agent_name, Training_date
-- ORDER BY Agent_name, Training_date

注意:我添加了几行以使示例更通用,以解决同一用户的多次培训案例

答案 1 :(得分:0)

请参阅下面的答案,使用我控制的where语句进行训练前和训练后,然后将两个表连接在一起以获得结果集。

CREATE TABLE #SET1
(
NAME VARCHAR(20),
SCORE INT,
[DATE] DATE
)

CREATE TABLE #TRAININGDATE
(
NAME VARCHAR(20),
TRAINING_DATE DATE
)

INSERT INTO #SET1
( NAME, SCORE, DATE )
VALUES
('Dan',81,'10/1/2016'),
('Brad',35,'8/5/2016'),
('Allison',92,'6/3/2016'),
('Cindy',95,'8/12/2016'),
('Dan',45,'7/16/2016'),
('Cindy',77,'4/16/2016'),
('Allison',59,'3/22/2016'),
('Brad',55,'3/22/2016')


INSERT INTO #TRAININGDATE
VALUES
('DAN','8/28/2016'),
('BRAD','4/15/2016'),
('CINDY','3/3/2016'),
('ALLISON','5/1/2016')

SELECT AVG(SCORE) AS AVERAGE_SCORE_BEFORE, A.NAME
        INTO #TEMP_A
        FROM #SET1 AS A
        LEFT JOIN #TRAININGDATE AS B
        ON A.NAME = B.NAME
        WHERE DATE < B.TRAINING_DATE
        GROUP BY A.NAME


SELECT AVG(SCORE) AS AVERAGE_SCORE_AFTER_TRAINING, A.NAME
        INTO #TEMP_B
        FROM #SET1 AS A
        LEFT JOIN #TRAININGDATE AS B
        ON A.NAME = B.NAME
        WHERE DATE > B.TRAINING_DATE
        GROUP BY A.NAME

SELECT A.NAME,ISNULL(B.AVERAGE_SCORE_BEFORE,0) AS AVERAGE_PRE_TRAINING,A.AVERAGE_SCORE_AFTER_TRAINING
        FROM #TEMP_B AS A
        LEFT JOIN #TEMP_A AS B
        ON A.NAME = B.NAME

答案 2 :(得分:0)

您可以使用派生表来完成此任务:

SELECT T.Agent_Name, T.Training_Date, Avg_Pre_Training, Avg_Post_Training
FROM   Training as T
JOIN  (SELECT T.Agent_Name, AVG(Score) as Avg_Pre_Training
       FROM   Training as T
       JOIN   Scores as S on S.Name= T.Agent_Name
       WHERE  S.Date < T.Training_Date
       GROUP BY T.Agent_Name
      ) as Pre on Pre.Agent_Name= T.Agent_Name
JOIN  (SELECT T.Agent_Name, AVG(Score) as Avg_Post_Training
       FROM   Training as T
       JOIN   Scores as S on S.Name= T.Agent_Name
       WHERE  S.Date >= T.Training_Date
       GROUP BY T.Agent_Name
      ) as Post on Post.Agent_Name= T.Agent_Name   

我不完全确定我是否正确使用了别名'bigquery,这是#legacySQL语法,因此可能需要进行一些调整。

答案 3 :(得分:0)

这应该在Standard SQL中解决:

with table1 as(
select 'Dan' as agent_name, 81 as score, '10/1/2016' as date union all
select 'Brad', 35, '8/5/2016' union all
select 'Allison', 92, '6/3/2016' union all
select 'Cindy', 95, '8/12/2016' union all
select 'Dan', 45, '7/16/2016' union all
select 'Cindy', 77, '4/16/2016' union all
select 'Allison', 59, '3/22/2016' union all
select 'Brad', 55, '3/22/2016'),

table2 as(
select 'Dan' agent_name, '8/28/2016' as train_date union all
select 'Brad', '4/15/2016' union all
select 'Cindy', '3/3/2016' union all
select 'Allison', '5/1/2016'
)

select
t1.agent_name name,
t2.train_date train_date,
avg(case when parse_date("%m/%d/%Y", t2.train_date) >= parse_date("%m/%d/%Y", t1.date) then t1.score end) pre_score,
avg(case when parse_date("%m/%d/%Y", t2.train_date) < parse_date("%m/%d/%Y", t1.date) then t1.score end) pos_score
from table1 t1
join table2 t2
on t1.agent_name = t2.agent_name
group by name, train_date

强烈建议您在BigQuery中使用此版本。