Biq查询-计数

时间:2019-12-16 01:00:00

标签: google-bigquery

我有显示的表格 ID员工名称主管名称

需要显示另一列CountD:直接上级的员工人数CountI,它显示间接员工的人数

快照共享enter image description here

尝试按超级用户名对分区进行计数(*),但没有帮助

1 个答案:

答案 0 :(得分:2)

以下是用于BigQuery标准SQL

尝试以下脚本

#standardSQL
DECLARE rows_count, run_away_stop INT64 DEFAULT 0;

CREATE TEMP TABLE input AS (
  SELECT 1 id, 'A' employee, 'X' supervisor UNION ALL
  SELECT 2, 'B', 'X' UNION ALL
  SELECT 3, 'C', 'X' UNION ALL
  SELECT 4, 'X', 'F' UNION ALL
  SELECT 5, 'Y', 'F' UNION ALL
  SELECT 6, 'F', 'G'
);

CREATE TEMP TABLE ttt AS SELECT supervisor, employee FROM input; 

LOOP 
  SET (rows_count, run_away_stop) = ((SELECT COUNT(1) FROM ttt), run_away_stop + 1);

  CREATE OR REPLACE TEMP TABLE ttt AS 
  SELECT supervisor, employee FROM ttt UNION DISTINCT
  SELECT t1.supervisor, t2.employee 
  FROM input t1 JOIN ttt t2
  ON t1.employee = t2.supervisor;

  IF rows_count = (SELECT COUNT(1) FROM ttt) OR run_away_stop > 10 THEN BREAK; END IF;  
END LOOP;

SELECT t1.*, 
  IFNULL(direct_employees, 0) AS direct_employees, 
  IFNULL(all_employees, 0) AS all_employees 
FROM input t1
LEFT JOIN (
  SELECT supervisor, COUNT(1) direct_employees 
  FROM input GROUP BY supervisor
) t2 ON t1.employee = t2.supervisor
LEFT JOIN (
  SELECT supervisor, COUNT(1) all_employees 
  FROM ttt GROUP BY supervisor
) t3 ON t1.employee = t3.supervisor
ORDER BY 1;   

它返回所需的输出

enter image description here

要应用于您的真实表格

  1. 删除CREATE TEMP TABLE input AS ( ... )语句
  2. 使用真实的表引用代替input来代替your_project.your_dataset.your_table
  3. 另外,请注意run_away_stop > 10表达式-它可以使循环运行不超过10次-您可以根据数据的层次结构深度来调整此数字

作为一种选择-您可以尝试使用Array而不是Temp表,如下例所示

#standardSQL
DECLARE rows_count, run_away_stop INT64 DEFAULT 0;
DECLARE ttt ARRAY<STRUCT<supervisor STRING, employee STRING>> DEFAULT [];

CREATE TEMP TABLE input AS (
  SELECT 1 id, 'A' employee, 'X' supervisor UNION ALL
  SELECT 2, 'B', 'X' UNION ALL
  SELECT 3, 'C', 'X' UNION ALL
  SELECT 4, 'X', 'F' UNION ALL
  SELECT 5, 'Y', 'F' UNION ALL
  SELECT 6, 'F', 'G'
);

SET ttt = ARRAY(SELECT AS STRUCT supervisor, employee FROM input);

LOOP 
  SET (rows_count, run_away_stop) = (ARRAY_LENGTH(ttt), run_away_stop + 1);

  SET ttt = ARRAY(
    SELECT AS STRUCT * FROM (
      SELECT supervisor, employee FROM UNNEST(ttt) UNION DISTINCT
      SELECT t1.supervisor, t2.employee 
      FROM input t1 JOIN UNNEST(ttt) t2
      ON t1.employee = t2.supervisor
    ));

  IF rows_count = ARRAY_LENGTH(ttt) OR run_away_stop > 10 THEN BREAK; END IF;
END LOOP;

SELECT t1.*, 
  IFNULL(direct_employees, 0) AS direct_employees, 
  IFNULL(all_employees, 0) AS all_employees 
FROM input t1
LEFT JOIN (
  SELECT supervisor, COUNT(1) direct_employees 
  FROM input GROUP BY supervisor
) t2 ON t1.employee = t2.supervisor
LEFT JOIN (
  SELECT supervisor, COUNT(1) all_employees 
  FROM UNNEST(ttt) GROUP BY supervisor
) t3 ON t1.employee = t3.supervisor
ORDER BY 1;