所有
我一直试图通过BigQuery获取一些较大的分析代码,但是我仍然遇到许多状态和存在的数据量的问题。我们正在谈论多年的数据。可能是我的查询不符合要求,但尝试根据特定组获取总和是我需要的。
我需要在查询中更改哪些内容以防止资源超出错误?
SELECT
COMPANY_NAME,
RATING_CLASS,
COMPANY_KEY,
-- State Info & Calculations
-- Over is used as a WINDOW function to SUM ALL results within the given query
SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER () AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
-- ZIP3 Info & Calculations
ZIP3,
ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5 Info & Calculations
ZIP5,
ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM (
SELECT
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
-- ZIP3
ZIP3,
COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP3) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
COUNT(*) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOTAL_RESULT_APPEARANCE,
SUM(CASE
WHEN lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RANK,
SUM(CASE
WHEN top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP5_COUNT,
SUM(CASE
WHEN top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_CENT_DIFF,
SUM(CASE
WHEN DISCOUNTED_lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RANK,
SUM(CASE
WHEN DISCOUNTED_top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5
ZIP5,
COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP5) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
COUNT(*) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOTAL_RESULT_APPEARANCE,
SUM(CASE
WHEN lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RANK,
SUM(CASE
WHEN top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP5_COUNT,
SUM(CASE
WHEN top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_CENT_DIFF,
SUM(CASE
WHEN DISCOUNTED_lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RANK,
SUM(CASE
WHEN DISCOUNTED_top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
ZIP3,
ZIP5,
LOWEST,
RATIO_TO_MIN,
RATE_ORDER,
TOP5,
TOP10,
CENT_DIFF,
DISCOUNTED_LOWEST,
DISCOUNTED_RATIO_TO_MIN,
DISCOUNTED_RATE_ORDER,
DISCOUNTED_TOP5,
DISCOUNTED_TOP10,
DISCOUNTED_CENT_DIFF,
LOGGING_KEY )
GROUP BY
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
-- ZIP3 General
ZIP3,
ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_LOWEST,
ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_AVG_RANK,
ZIP3_TOP5,
ZIP3_TOP10,
ZIP3_AVG_CENT_DIFF,
ZIP3_DISCOUNTED_LOWEST,
ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_DISCOUNTED_TOP5,
ZIP3_DISCOUNTED_TOP10,
ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5 General
ZIP5,
ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_LOWEST,
ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_AVG_RANK,
ZIP5_TOP5,
ZIP5_TOP10,
ZIP5_AVG_CENT_DIFF,
ZIP5_DISCOUNTED_LOWEST,
ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_DISCOUNTED_TOP5,
ZIP5_DISCOUNTED_TOP10,
ZIP5_DISCOUNTED_AVG_CENT_DIFF
使用建议的修正更新了查询:
SELECT
main.COMPANY_NAME AS COMPANY_NAME,
main.COMPANY_KEY AS COMPANY_KEY,
main.RATING_CLASS AS RATING_CLASS,
state_count.STATE_MED_SUPP_TOOL_NUM_QUOTE AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
-- ZIP3
main.ZIP3 AS ZIP3,
ZIP3_COUNT.ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_SUB.ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_SUB.ZIP3_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
ZIP3_SUB.ZIP3_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
ZIP3_SUB.ZIP3_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
ZIP3_SUB.ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
ZIP3_SUB.ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_SUB.ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
ZIP3_SUB.ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
ZIP3_SUB.ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_SUB.ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5
main.ZIP5 AS ZIP5,
ZIP5_COUNT.ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_SUB.ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_SUB.ZIP5_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
ZIP5_SUB.ZIP5_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
ZIP5_SUB.ZIP5_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
ZIP5_SUB.ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
ZIP5_SUB.ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_SUB.ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
ZIP5_SUB.ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
ZIP5_SUB.ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_SUB.ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM (
SELECT
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
ZIP3,
ZIP5,
STATE,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824",
"4872666167115776",
"6396348765044736",
"6139303562313728",
"4988973881491456")
AND portal_key NOT IN ("5878607637381120")
GROUP BY
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
ZIP3,
ZIP5,
STATE,
) AS MAIN
LEFT JOIN (
SELECT
ZIP3,
COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE,
COMPANY_KEY,
RATING_CLASS,
AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP3_TOP5_COUNT,
SUM(CASE
WHEN LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP3_LOWEST_COUNT,
SUM(CASE
WHEN TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP3_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP3_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF,
AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN DISCOUNTED_TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
SUM(CASE
WHEN DISCOUNTED_TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP3,
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
) AS ZIP3_SUB
ON
main.ZIP3 = ZIP3_SUB.ZIP3
AND main.COMPANY_KEY = ZIP3_SUB.COMPANY_KEY
AND main.RATING_CLASS = ZIP3_SUB.RATING_CLASS
LEFT JOIN (
SELECT
ZIP3,
EXACT_COUNT_DISTINCT(logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP3 ) AS ZIP3_COUNT
ON
main.ZIP3 = ZIP3_COUNT.ZIP3
LEFT JOIN (
SELECT
ZIP5,
COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE,
COMPANY_KEY,
RATING_CLASS,
AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP5_TOP5_COUNT,
SUM(CASE
WHEN LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP5_LOWEST_COUNT,
SUM(CASE
WHEN TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP5_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP5_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF,
AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN DISCOUNTED_TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
SUM(CASE
WHEN DISCOUNTED_TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP5,
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
) AS ZIP5_SUB
ON
main.ZIP5 = ZIP5_SUB.ZIP5
AND main.COMPANY_KEY = ZIP5_SUB.COMPANY_KEY
AND main.RATING_CLASS = ZIP5_SUB.RATING_CLASS
LEFT JOIN (
SELECT
ZIP5,
EXACT_COUNT_DISTINCT(logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP5 ) AS ZIP5_COUNT
ON
main.ZIP5 = ZIP5_COUNT.ZIP5
LEFT JOIN (
SELECT
STATE,
EXACT_COUNT_DISTINCT(logging_key) AS STATE_MED_SUPP_TOOL_NUM_QUOTE
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
STATE ) AS STATE_COUNT
ON
main.STATE = STATE_COUNT.STATE
说明: 由于BigQuery是一个共享资源,BigQuery会近似查询所需的资源量并相应地分配资源。通过将查询拆分为同一个表上的多个联接,可以通过比最初预期更多的资源来执行计算。有关限制查询的原因的更多技术解释,请参阅Jordan Tigani关于堆栈溢出响应here。
答案 0 :(得分:2)
不知怎的,我觉得下面是你需要的。我可能是错的,因为它实际上只是相对盲目地尝试对你的逻辑进行逆向工程,因此如果我错了,不要严重判断 无法测试,但我觉得实际上成功会很便宜
SELECT
main.COMPANY_NAME AS COMPANY_NAME,
main.COMPANY_KEY AS COMPANY_KEY,
main.RATING_CLASS AS RATING_CLASS,
SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER () AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
-- ZIP3
main.ZIP3 AS ZIP3,
ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5
main.ZIP5 AS ZIP5,
ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF
FROM (
SELECT COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5
) AS main
LEFT JOIN (
SELECT
ZIP3, company_key, rating_class,
COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE,
SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP3_AVG_RANK,
SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_TOP5_COUNT,
SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF,
SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK,
SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT,
SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP3, company_key, rating_class
) AS zip3_sub
ON main.ZIP3 = zip3_sub.ZIP3 AND main.company_key = zip3_sub.company_key AND main.rating_class = zip3_sub.rating_class
LEFT JOIN (
SELECT
ZIP5, company_key, rating_class,
COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE,
SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP5_AVG_RANK,
SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_TOP5_COUNT,
SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF,
SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK,
SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT,
SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP5, company_key, rating_class
) AS zip5_sub
ON main.ZIP5 = zip5_sub.ZIP5 AND main.company_key = zip5_sub.company_key AND main.rating_class = zip5_sub.rating_class
LEFT JOIN (
SELECT ZIP3, COUNT(DISTINCT logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP3
) AS zip3_count
ON main.ZIP3 = zip3_count.ZIP3
LEFT JOIN (
SELECT ZIP5, COUNT(DISTINCT logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP5
) AS zip5_count
ON main.ZIP5 = zip5_count.ZIP5
另外,请注意:在BigQuery Legacy SQL中,COUNT(DISTINCT)函数是概率性的 - 给出统计近似值,并不保证是准确的。
您可以使用EXACT_COUNT_DISTINCT()功能 - 这个功能可以为您提供确切的数字,但在后端需要更贵一点
当COUNT(DISTINCT)产生精确计数时,可以为BigQuery Standard SQL重写整个查询,并且根据BigQuery Team建议使用标准SQL