I'm attempting to join to tables and count how many "Checklists" are completed.
What you'll notice with id: 1
, is that:
01-09
: It was marked completed 01-10
: It was marked uncompleted 01-11
: It was marked completed againBecause of this my count is off by 1. I'm looking to only choose the last completed action per id
. The actual response should be:
| Worksite | Count |
| ---------- | ----- |
| worksite_1 | 4 |
| worksite_2 | 2 |
EDIT
I figured out how to do it if I wanted to separate the count by date. But I haven't figured out how I would do it if I wanted a TOTAL. This seems to work for by date:
SELECT
DATE(ChecklistCompletions.ts) AS `DATE`,
Checklists.worksite_id AS `Worksite`,
COUNT(DISTINCT (CASE WHEN ChecklistCompletions.completed = 1 THEN 1 END)) AS `Count`
FROM Checklists
LEFT JOIN ChecklistCompletions
on Checklists.id = ChecklistCompletions.id
GROUP BY `Worksite`, `DATE`
ORDER BY `DATE` DESC
Is this something that is possible to do? Any help would be greatly appreciated.
NOTE: I'm only using MySQL
as a playground. I'm looking for a solution in BigQuery Standard SQL
.
Schema (MySQL v5.7)
CREATE TABLE Checklists
(`id` varchar(55), `uid` varchar(55), `worksite_id` varchar(55), `ts` datetime)
;
CREATE TABLE ChecklistCompletions
(`id` varchar(55), `uid` varchar(55), `completed` tinyint(1), `ts` datetime)
;
INSERT INTO ChecklistCompletions
(`id`, `uid`, `completed`, `ts`)
VALUES
("1", "u12345", 1, '2019-01-09 00:00:00'),
("1", "u12345", 0, '2019-01-10 00:00:00'),
("1", "u12345", 1, '2019-01-11 00:00:00'),
("2", "u12345", 0, '2019-01-13 00:00:00'),
("3", "u12345", 1, '2019-01-12 00:00:00'),
("4", "u12345", 1, '2019-01-13 00:00:00'),
("5", "u12345", 1, '2019-01-12 00:00:00'),
("6", "u12345", 0, '2019-01-17 00:00:00'),
("7", "u1", 1, '2019-01-10 00:00:00'),
("8", "u1", 0, '2019-01-12 00:00:00'),
("9", "u1", 1, '2019-01-15 00:05:00'),
("10", "u1", 0, '2019-01-15 00:00:00')
;
INSERT INTO Checklists
(`id`, `uid`, `worksite_id`, `ts`)
VALUES
("1", "u12345", "worksite_1", '2019-01-09 00:00:00'),
("2", "u12345", "worksite_2", '2019-01-13 00:00:00'),
("3", "u12345", "worksite_2", '2019-01-12 00:00:00'),
("4", "u12345", "worksite_1", '2019-01-13 00:00:00'),
("5", "u12345", "worksite_2", '2019-01-12 00:00:00'),
("6", "u12345", "worksite_1", '2019-01-17 00:00:00'),
("7", "u1", "worksite_1", '2019-01-10 00:00:00'),
("8", "u1", "worksite_1", '2019-01-12 00:00:00'),
("9", "u1", "worksite_1", '2019-01-15 00:05:00'),
("10", "u1", "worksite_2", '2019-01-15 00:00:00')
;
Query #1
SELECT
Checklists.worksite_id AS `Worksite`,
COUNT(CASE WHEN ChecklistCompletions.completed = 1 THEN 1 END) AS `Count`
FROM Checklists
LEFT JOIN ChecklistCompletions
on Checklists.id = ChecklistCompletions.id
GROUP BY `Worksite`;
| Worksite | Count |
| ---------- | ----- |
| worksite_1 | 5 |
| worksite_2 | 2 |
答案 0 :(得分:1)
以下是用于BigQuery标准SQL
#standardSQL
SELECT Worksite, COUNTIF(completed = 1) completed
FROM (
SELECT
Checklists.worksite_id AS `Worksite`,
ARRAY_AGG(completed ORDER BY completed DESC LIMIT 1)[OFFSET(0)] completed
FROM `project.dataset.Checklists` Checklists
LEFT JOIN `project.dataset.ChecklistCompletions` ChecklistCompletions
ON Checklists.id = ChecklistCompletions.id
GROUP BY Checklists.id, Worksite
) GROUP BY worksite
如果将其应用于问题的样本数据,您将得到结果(如预期)
Row Worksite completed
1 worksite_1 4
2 worksite_2 2
您可以使用下面的
进行测试,操作#standardSQL
WITH `project.dataset.ChecklistCompletions` AS (
SELECT "1" id, "u12345" uid, 1 completed, TIMESTAMP '2019-01-09 00:00:00' ts UNION ALL
SELECT "1", "u12345", 0, '2019-01-10 00:00:00' UNION ALL
SELECT "1", "u12345", 1, '2019-01-11 00:00:00' UNION ALL
SELECT "2", "u12345", 0, '2019-01-13 00:00:00' UNION ALL
SELECT "3", "u12345", 1, '2019-01-12 00:00:00' UNION ALL
SELECT "4", "u12345", 1, '2019-01-13 00:00:00' UNION ALL
SELECT "5", "u12345", 1, '2019-01-12 00:00:00' UNION ALL
SELECT "6", "u12345", 0, '2019-01-17 00:00:00' UNION ALL
SELECT "7", "u1", 1, '2019-01-10 00:00:00' UNION ALL
SELECT "8", "u1", 0, '2019-01-12 00:00:00' UNION ALL
SELECT "9", "u1", 1, '2019-01-15 00:05:00' UNION ALL
SELECT "10", "u1", 0, '2019-01-15 00:00:00'
), `project.dataset.Checklists` AS (
SELECT "1" id, "u12345" uid, "worksite_1" worksite_id, TIMESTAMP '2019-01-09 00:00:00' ts UNION ALL
SELECT "2", "u12345", "worksite_2", '2019-01-13 00:00:00' UNION ALL
SELECT "3", "u12345", "worksite_2", '2019-01-12 00:00:00' UNION ALL
SELECT "4", "u12345", "worksite_1", '2019-01-13 00:00:00' UNION ALL
SELECT "5", "u12345", "worksite_2", '2019-01-12 00:00:00' UNION ALL
SELECT "6", "u12345", "worksite_1", '2019-01-17 00:00:00' UNION ALL
SELECT "7", "u1", "worksite_1", '2019-01-10 00:00:00' UNION ALL
SELECT "8", "u1", "worksite_1", '2019-01-12 00:00:00' UNION ALL
SELECT "9", "u1", "worksite_1", '2019-01-15 00:05:00' UNION ALL
SELECT "10", "u1", "worksite_2", '2019-01-15 00:00:00'
)
SELECT Worksite, COUNTIF(completed = 1) completed
FROM (
SELECT
Checklists.worksite_id AS `Worksite`,
ARRAY_AGG(completed ORDER BY completed DESC LIMIT 1)[OFFSET(0)] completed
FROM `project.dataset.Checklists` Checklists
LEFT JOIN `project.dataset.ChecklistCompletions` ChecklistCompletions
ON Checklists.id = ChecklistCompletions.id
GROUP BY Checklists.id, Worksite
) GROUP BY worksite
ORDER BY worksite