查询执行期间超出资源。 BigQuery的

时间:2017-01-19 20:09:37

标签: google-bigquery

所有

我一直试图通过BigQuery获取一些较大的分析代码,但是我仍然遇到许多状态和存在的数据量的问题。我们正在谈论多年的数据。可能是我的查询不符合要求,但尝试根据特定组获取总和是我需要的。

我需要在查询中更改哪些内容以防止资源超出错误?

SELECT
  COMPANY_NAME,
  RATING_CLASS,
  COMPANY_KEY,
  -- State Info & Calculations
  -- Over is used as a WINDOW function to SUM ALL results within the given query 
  SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER () AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
  -- ZIP3 Info & Calculations
  ZIP3,
  ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
  ZIP3_TOTAL_RESULT_APPEARANCE,
  ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
  ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
  ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
  ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
  ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
  ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
  ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
  ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
  ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
  ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
  ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
  ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
  -- ZIP5 Info & Calculations
  ZIP5,
  ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
  ZIP5_TOTAL_RESULT_APPEARANCE,
  ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
  ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
  ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
  ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
  ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
  ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
  ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
  ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
  ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
  ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
  ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
  ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM (
  SELECT
    COMPANY_NAME,
    COMPANY_KEY,
    RATING_CLASS,
    -- ZIP3
    ZIP3,
    COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP3) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
    COUNT(*) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOTAL_RESULT_APPEARANCE,
    SUM(CASE
        WHEN lowest = TRUE THEN 1
        ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_LOWEST_COUNT,
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RATIO_TO_LOWEST,
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RANK,
    SUM(CASE
        WHEN top5 = TRUE THEN 1
        ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP5_COUNT,
    SUM(CASE
        WHEN top10 = TRUE THEN 1
        ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP10_COUNT,
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_CENT_DIFF,
    SUM(CASE
        WHEN DISCOUNTED_lowest = TRUE THEN 1
        ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RANK,
    SUM(CASE
        WHEN DISCOUNTED_top5 = TRUE THEN 1
        ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP5_COUNT,
    SUM(CASE
        WHEN DISCOUNTED_top10 = TRUE THEN 1
        ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP10_COUNT,
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
    -- ZIP5
    ZIP5,
    COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP5) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
    COUNT(*) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOTAL_RESULT_APPEARANCE,
    SUM(CASE
        WHEN lowest = TRUE THEN 1
        ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_LOWEST_COUNT,
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RATIO_TO_LOWEST,
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RANK,
    SUM(CASE
        WHEN top5 = TRUE THEN 1
        ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP5_COUNT,
    SUM(CASE
        WHEN top10 = TRUE THEN 1
        ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP10_COUNT,
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_CENT_DIFF,
    SUM(CASE
        WHEN DISCOUNTED_lowest = TRUE THEN 1
        ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RANK,
    SUM(CASE
        WHEN DISCOUNTED_top5 = TRUE THEN 1
        ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP5_COUNT,
    SUM(CASE
        WHEN DISCOUNTED_top10 = TRUE THEN 1
        ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP10_COUNT,
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
  FROM
    [csgapi:qh_med_supp_tool.v2_TX]
  WHERE
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
    AND user_key NOT IN ("6522869941010432", "6277136540237824")
  GROUP BY
    COMPANY_NAME,
    COMPANY_KEY,
    RATING_CLASS,
    ZIP3,
    ZIP5,
    LOWEST,
    RATIO_TO_MIN,
    RATE_ORDER,
    TOP5,
    TOP10,
    CENT_DIFF,
    DISCOUNTED_LOWEST,
    DISCOUNTED_RATIO_TO_MIN,
    DISCOUNTED_RATE_ORDER,
    DISCOUNTED_TOP5,
    DISCOUNTED_TOP10,
    DISCOUNTED_CENT_DIFF,
    LOGGING_KEY )
GROUP BY
  COMPANY_NAME,
  COMPANY_KEY,
  RATING_CLASS,
  -- ZIP3 General
  ZIP3,
  ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
  ZIP3_TOTAL_RESULT_APPEARANCE,
  ZIP3_LOWEST,
  ZIP3_AVG_RATIO_TO_LOWEST,
  ZIP3_AVG_RANK,
  ZIP3_TOP5,
  ZIP3_TOP10,
  ZIP3_AVG_CENT_DIFF,
  ZIP3_DISCOUNTED_LOWEST,
  ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
  ZIP3_DISCOUNTED_AVG_RANK,
  ZIP3_DISCOUNTED_TOP5,
  ZIP3_DISCOUNTED_TOP10,
  ZIP3_DISCOUNTED_AVG_CENT_DIFF,
  -- ZIP5 General
  ZIP5,
  ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
  ZIP5_TOTAL_RESULT_APPEARANCE,
  ZIP5_LOWEST,
  ZIP5_AVG_RATIO_TO_LOWEST,
  ZIP5_AVG_RANK,
  ZIP5_TOP5,
  ZIP5_TOP10,
  ZIP5_AVG_CENT_DIFF,
  ZIP5_DISCOUNTED_LOWEST,
  ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
  ZIP5_DISCOUNTED_AVG_RANK,
  ZIP5_DISCOUNTED_TOP5,
  ZIP5_DISCOUNTED_TOP10,
  ZIP5_DISCOUNTED_AVG_CENT_DIFF

使用建议的修正更新了查询:

SELECT
  main.COMPANY_NAME AS COMPANY_NAME,
  main.COMPANY_KEY AS COMPANY_KEY,
  main.RATING_CLASS AS RATING_CLASS,
  state_count.STATE_MED_SUPP_TOOL_NUM_QUOTE AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
  -- ZIP3
  main.ZIP3 AS ZIP3,
  ZIP3_COUNT.ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
  ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE,
  ZIP3_SUB.ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
  ZIP3_SUB.ZIP3_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
  ZIP3_SUB.ZIP3_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
  ZIP3_SUB.ZIP3_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
  ZIP3_SUB.ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
  ZIP3_SUB.ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
  ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
  ZIP3_SUB.ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
  ZIP3_SUB.ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
  ZIP3_SUB.ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
  ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
  ZIP3_SUB.ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
  -- ZIP5
  main.ZIP5 AS ZIP5,
  ZIP5_COUNT.ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
  ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE,
  ZIP5_SUB.ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
  ZIP5_SUB.ZIP5_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
  ZIP5_SUB.ZIP5_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
  ZIP5_SUB.ZIP5_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
  ZIP5_SUB.ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
  ZIP5_SUB.ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
  ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
  ZIP5_SUB.ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
  ZIP5_SUB.ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
  ZIP5_SUB.ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
  ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
  ZIP5_SUB.ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM (
  SELECT
    COMPANY_NAME,
    COMPANY_KEY,
    RATING_CLASS,
    ZIP3,
    ZIP5,
    STATE,
  FROM
    [csgapi:qh_med_supp_tool.v2_TX]
  WHERE
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
    AND user_key NOT IN ("6522869941010432",
      "6277136540237824",
      "4872666167115776",
      "6396348765044736",
      "6139303562313728",
      "4988973881491456")
    AND portal_key NOT IN ("5878607637381120")
  GROUP BY
    COMPANY_NAME,
    COMPANY_KEY,
    RATING_CLASS,
    ZIP3,
    ZIP5,
    STATE,
    ) AS MAIN
LEFT JOIN (
  SELECT
    ZIP3,
    COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE,
    COMPANY_KEY,
    RATING_CLASS,
    AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST,
    SUM(CASE
        WHEN TOP5 = TRUE THEN 1
        ELSE 0 END) AS ZIP3_TOP5_COUNT,
    SUM(CASE
        WHEN LOWEST = TRUE THEN 1
        ELSE 0 END) AS ZIP3_LOWEST_COUNT,
    SUM(CASE
        WHEN TOP10 = TRUE THEN 1
        ELSE 0 END) AS ZIP3_TOP10_COUNT,
    AVG(discounted_rate_order) AS ZIP3_AVG_RANK,
    AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF,
    AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
    SUM(CASE
        WHEN DISCOUNTED_TOP5 = TRUE THEN 1
        ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT,
    SUM(CASE
        WHEN DISCOUNTED_LOWEST = TRUE THEN 1
        ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
    SUM(CASE
        WHEN DISCOUNTED_TOP10 = TRUE THEN 1
        ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT,
    AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK,
    AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
  FROM
    [csgapi:qh_med_supp_tool.v2_TX]
  WHERE
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
    AND user_key NOT IN ("6522869941010432",
      "6277136540237824")
  GROUP BY
    ZIP3,
    COMPANY_NAME,
    COMPANY_KEY,
    RATING_CLASS,
    ) AS ZIP3_SUB
ON
  main.ZIP3 = ZIP3_SUB.ZIP3
  AND main.COMPANY_KEY = ZIP3_SUB.COMPANY_KEY
  AND main.RATING_CLASS = ZIP3_SUB.RATING_CLASS
LEFT JOIN (
  SELECT
    ZIP3,
    EXACT_COUNT_DISTINCT(logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE
  FROM
    [csgapi:qh_med_supp_tool.v2_TX]
  WHERE
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
    AND user_key NOT IN ("6522869941010432",
      "6277136540237824")
  GROUP BY
    ZIP3 ) AS ZIP3_COUNT
ON
  main.ZIP3 = ZIP3_COUNT.ZIP3
LEFT JOIN (
  SELECT
    ZIP5,
    COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE,
    COMPANY_KEY,
    RATING_CLASS,
    AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST,
    SUM(CASE
        WHEN TOP5 = TRUE THEN 1
        ELSE 0 END) AS ZIP5_TOP5_COUNT,
    SUM(CASE
        WHEN LOWEST = TRUE THEN 1
        ELSE 0 END) AS ZIP5_LOWEST_COUNT,
    SUM(CASE
        WHEN TOP10 = TRUE THEN 1
        ELSE 0 END) AS ZIP5_TOP10_COUNT,
    AVG(discounted_rate_order) AS ZIP5_AVG_RANK,
    AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF,
    AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
    SUM(CASE
        WHEN DISCOUNTED_TOP5 = TRUE THEN 1
        ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT,
    SUM(CASE
        WHEN DISCOUNTED_LOWEST = TRUE THEN 1
        ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
    SUM(CASE
        WHEN DISCOUNTED_TOP10 = TRUE THEN 1
        ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT,
    AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK,
    AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
  FROM
    [csgapi:qh_med_supp_tool.v2_TX]
  WHERE
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
    AND user_key NOT IN ("6522869941010432",
      "6277136540237824")
  GROUP BY
    ZIP5,
    COMPANY_NAME,
    COMPANY_KEY,
    RATING_CLASS,
    ) AS ZIP5_SUB
ON
  main.ZIP5 = ZIP5_SUB.ZIP5
  AND main.COMPANY_KEY = ZIP5_SUB.COMPANY_KEY
  AND main.RATING_CLASS = ZIP5_SUB.RATING_CLASS
LEFT JOIN (
  SELECT
    ZIP5,
    EXACT_COUNT_DISTINCT(logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE
  FROM
    [csgapi:qh_med_supp_tool.v2_TX]
  WHERE
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
    AND user_key NOT IN ("6522869941010432",
      "6277136540237824")
  GROUP BY
    ZIP5 ) AS ZIP5_COUNT
ON
  main.ZIP5 = ZIP5_COUNT.ZIP5
LEFT JOIN (
  SELECT
    STATE,
    EXACT_COUNT_DISTINCT(logging_key) AS STATE_MED_SUPP_TOOL_NUM_QUOTE
  FROM
    [csgapi:qh_med_supp_tool.v2_TX]
  WHERE
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
    AND user_key NOT IN ("6522869941010432",
      "6277136540237824")
  GROUP BY
    STATE ) AS STATE_COUNT
ON
  main.STATE = STATE_COUNT.STATE

说明: 由于BigQuery是一个共享资源,BigQuery会近似查询所需的资源量并相应地分配资源。通过将查询拆分为同一个表上的多个联接,可以通过比最初预期更多的资源来执行计算。有关限制查询的原因的更多技术解释,请参阅Jordan Tigani关于堆栈溢出响应here

1 个答案:

答案 0 :(得分:2)

不知怎的,我觉得下面是你需要的。我可能是错的,因为它实际上只是相对盲目地尝试对你的逻辑进行逆向工程,因此如果我错了,不要严重判断 无法测试,但我觉得实际上成功会很便宜

SELECT
  main.COMPANY_NAME AS COMPANY_NAME,
  main.COMPANY_KEY AS COMPANY_KEY,
  main.RATING_CLASS AS RATING_CLASS,
  SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER () AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
  -- ZIP3
  main.ZIP3 AS ZIP3,
  ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
  ZIP3_TOTAL_RESULT_APPEARANCE,
  ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
  ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
  ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
  ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
  ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
  ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
  ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
  ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
  ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
  ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
  ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
  ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
  -- ZIP5
  main.ZIP5 AS ZIP5,
  ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
  ZIP5_TOTAL_RESULT_APPEARANCE,
  ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
  ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
  ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
  ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
  ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
  ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
  ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
  ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
  ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
  ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
  ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
  ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF
FROM (
  SELECT COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5
  FROM [csgapi:qh_med_supp_tool.v2_TX]
  WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
    AND user_key NOT IN ("6522869941010432", "6277136540237824")
  GROUP BY COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5  
) AS main
LEFT JOIN (
  SELECT 
    ZIP3, company_key, rating_class,
    COUNT(*)                                                  AS ZIP3_TOTAL_RESULT_APPEARANCE,
    SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END)            AS ZIP3_LOWEST_COUNT,
    AVG(discounted_ratio_to_min)                              AS ZIP3_AVG_RATIO_TO_LOWEST,
    AVG(discounted_rate_order)                                AS ZIP3_AVG_RANK,
    SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END)              AS ZIP3_TOP5_COUNT,
    SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END)             AS ZIP3_TOP10_COUNT,
    AVG(discounted_cent_diff)                                 AS ZIP3_AVG_CENT_DIFF,
    SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
    AVG(discounted_ratio_to_min)                              AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
    AVG(discounted_rate_order)                                AS ZIP3_DISCOUNTED_AVG_RANK,
    SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END)   AS ZIP3_DISCOUNTED_TOP5_COUNT,
    SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END)  AS ZIP3_DISCOUNTED_TOP10_COUNT,
    AVG(discounted_cent_diff)                                 AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
    FROM [csgapi:qh_med_supp_tool.v2_TX]
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
      AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
      AND user_key NOT IN ("6522869941010432", "6277136540237824")
    GROUP BY ZIP3, company_key, rating_class
) AS zip3_sub
ON main.ZIP3 = zip3_sub.ZIP3 AND main.company_key = zip3_sub.company_key AND main.rating_class = zip3_sub.rating_class
LEFT JOIN (  
  SELECT 
    ZIP5, company_key, rating_class,
    COUNT(*)                                                  AS ZIP5_TOTAL_RESULT_APPEARANCE,
    SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END)            AS ZIP5_LOWEST_COUNT,
    AVG(discounted_ratio_to_min)                              AS ZIP5_AVG_RATIO_TO_LOWEST,
    AVG(discounted_rate_order)                                AS ZIP5_AVG_RANK,
    SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END)              AS ZIP5_TOP5_COUNT,
    SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END)             AS ZIP5_TOP10_COUNT,
    AVG(discounted_cent_diff)                                 AS ZIP5_AVG_CENT_DIFF,
    SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
    AVG(discounted_ratio_to_min)                              AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
    AVG(discounted_rate_order)                                AS ZIP5_DISCOUNTED_AVG_RANK,
    SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END)   AS ZIP5_DISCOUNTED_TOP5_COUNT,
    SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END)  AS ZIP5_DISCOUNTED_TOP10_COUNT,
    AVG(discounted_cent_diff)                                 AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
  FROM [csgapi:qh_med_supp_tool.v2_TX]
  WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
    AND user_key NOT IN ("6522869941010432", "6277136540237824")
  GROUP BY ZIP5, company_key, rating_class
) AS zip5_sub
ON main.ZIP5 = zip5_sub.ZIP5 AND main.company_key = zip5_sub.company_key AND main.rating_class = zip5_sub.rating_class
LEFT JOIN (
  SELECT ZIP3, COUNT(DISTINCT logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE
  FROM [csgapi:qh_med_supp_tool.v2_TX]
  WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
    AND user_key NOT IN ("6522869941010432", "6277136540237824")
  GROUP BY ZIP3
) AS zip3_count
ON main.ZIP3 = zip3_count.ZIP3
LEFT JOIN (
  SELECT ZIP5, COUNT(DISTINCT logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE
  FROM [csgapi:qh_med_supp_tool.v2_TX]
  WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
    AND user_key NOT IN ("6522869941010432", "6277136540237824")
  GROUP BY ZIP5
) AS zip5_count
ON main.ZIP5 = zip5_count.ZIP5 

另外,请注意:在BigQuery Legacy SQL中,COUNT(DISTINCT)函数是概率性的 - 给出统计近似值,并不保证是准确的。
您可以使用EXACT_COUNT_DISTINCT()功能 - 这个功能可以为您提供确切的数字,但在后端需要更贵一点

当COUNT(DISTINCT)产生精确计数时,可以为BigQuery Standard SQL重写整个查询,并且根据BigQuery Team建议使用标准SQL