BigQuery
的新手。
在inventory
表中,粒度为depot_id
和product_id
,并且inventorytransaction
表具有从inventory
完成的每个操作(加法或减法)日志表。
需要的是像这样inventory
表SELECT
中的额外列来获取当年每个月(1月至12月)的数量总和
SELECT inventory.*, janTotalQuantity, febTotalQuantity, marTotalQuantity,...
我尝试过的是LEFT JOIN
库存表,该表带有一个子查询,该查询可获取每月各仓库和产品的总年份(例如Jan-2019,Feb-2019,Mar-2019等)。下面是执行此操作的SQL
语句。
SELECT inv.inventory_id, p.product_name, p.product_type, p.product_distributor as distributor, p.product_category as category, d.depot_name as location, inv.quantity, inv.lower_limit, inv.unit_cost, inv.quantity * inv.unit_cost as value, p.product_id, d.depot_id, TIMESTAMP_SECONDS(inv.update_date) as last_update, inv.delete_status, IF(agg_sd.mon_year = "Jan-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS janQuantityTotal,IF(agg_sd.mon_year = "Feb-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS febQuantityTotal,IF(agg_sd.mon_year = "Mar-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS marQuantityTotal,IF(agg_sd.mon_year = "Apr-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS aprQuantityTotal,IF(agg_sd.mon_year = "May-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS mayQuantityTotal,IF(agg_sd.mon_year = "Jun-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS junQuantityTotal,IF(agg_sd.mon_year = "Jul-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS julQuantityTotal,IF(agg_sd.mon_year = "Aug-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS augQuantityTotal,IF(agg_sd.mon_year = "Sep-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS sepQuantityTotal,IF(agg_sd.mon_year = "Oct-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS octQuantityTotal,IF(agg_sd.mon_year = "Nov-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS novQuantityTotal,IF(agg_sd.mon_year = "Dec-{{ execution_date.year }}", agg_sd.totalQuantity, 0) AS decQuantityTotal
FROM iprocure_stage.inventory inv
JOIN iprocure_ods.product p ON p.product_id = inv.product_id
JOIN iprocure_ods.depot d ON d.depot_id = inv.depot_id
LEFT JOIN (
SELECT FORMAT_TIMESTAMP('%b-%Y', transaction_date) mon_year, product_id, depot_id, SUM(quantity) as totalQuantity
FROM `iprocure_ods.inventorytransaction`
WHERE EXTRACT(YEAR FROM transaction_date) = {{ execution_date.year }}
AND transaction_type = 1 AND (reference_type = 1 OR reference_type = 6) AND delete_status = 0
GROUP BY mon_year, product_id, depot_id
) AS agg_sd ON agg_sd.product_id = inv.product_id AND agg_sd.depot_id = inv.depot_id
上述查询的问题在于,对于给定仓库产品的每个月的总数量,重复了库存记录,就像这样
----------------------------------------------------------------------------------
inventory_id depot_id product_id janTotalQuantity febTotalQuantity
-------------------------------------------------------------------------------------
123 2 3 56 0
123 2 3 0 65
如何避免重复inventory
表并在BigQuery中添加每月总数量列
答案 0 :(得分:2)
您可以通过在部分总和之外的全部分组进行分组,并对这些总和应用SUM聚合功能。这应该使您的输出数据集平坦:
SELECT inv.inventory_id, p.product_name, p.product_type, p.product_distributor as distributor, p.product_category as category, d.depot_name as location, inv.quantity, inv.lower_limit, inv.unit_cost, inv.quantity * inv.unit_cost as value, p.product_id, d.depot_id, TIMESTAMP_SECONDS(inv.update_date) as last_update, inv.delete_status,
SUM(IF(agg_sd.mon_year = "Jan-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS janQuantityTotal,
SUM(IF(agg_sd.mon_year = "Feb-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS febQuantityTotal,
SUM(IF(agg_sd.mon_year = "Mar-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS marQuantityTotal,
SUM(IF(agg_sd.mon_year = "Apr-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS aprQuantityTotal,
SUM(IF(agg_sd.mon_year = "May-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS mayQuantityTotal,
SUM(IF(agg_sd.mon_year = "Jun-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS junQuantityTotal,
SUM(IF(agg_sd.mon_year = "Jul-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS julQuantityTotal,
SUM(IF(agg_sd.mon_year = "Aug-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS augQuantityTotal,
SUM(IF(agg_sd.mon_year = "Sep-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS sepQuantityTotal,
SUM(IF(agg_sd.mon_year = "Oct-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS octQuantityTotal,
SUM(IF(agg_sd.mon_year = "Nov-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS novQuantityTotal,
SUM(IF(agg_sd.mon_year = "Dec-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS decQuantityTotal
FROM iprocure_stage.inventory inv
JOIN iprocure_ods.product p ON p.product_id = inv.product_id
JOIN iprocure_ods.depot d ON d.depot_id = inv.depot_id
LEFT JOIN (
SELECT FORMAT_TIMESTAMP('%b-%Y', transaction_date) mon_year, product_id, depot_id, SUM(quantity) as totalQuantity
FROM `iprocure_ods.inventorytransaction`
WHERE EXTRACT(YEAR FROM transaction_date) = {{ execution_date.year }}
AND transaction_type = 1 AND (reference_type = 1 OR reference_type = 6) AND delete_status = 0
GROUP BY mon_year, product_id, depot_id
) AS agg_sd ON agg_sd.product_id = inv.product_id AND agg_sd.depot_id = inv.depot_id
GROUP BY inv.inventory_id, p.product_name, p.product_type, p.product_distributor as distributor, p.product_category as category, d.depot_name as location, inv.quantity, inv.lower_limit, inv.unit_cost, inv.quantity * inv.unit_cost as value, p.product_id, d.depot_id, TIMESTAMP_SECONDS(inv.update_date), inv.delete_status
答案 1 :(得分:1)
您正在尝试模仿数据透视表,因为它应该使用(fake)聚合函数
SELECT inv.inventory_id
, p.product_name
, p.product_type
, p.product_distributor as distributor
, p.product_category as category
, d.depot_name as location
, inv.quantity
, inv.lower_limit
, inv.unit_cost
, inv.quantity * inv.unit_cost as value
, p.product_id, d.depot_id
, TIMESTAMP_SECONDS(inv.update_date) as last_update
, inv.delete_status
, max(IF(agg_sd.mon_year = "Jan-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS janQuantityTotal
, max(IF(agg_sd.mon_year = "Feb-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS febQuantityTotal
, max(IF(agg_sd.mon_year = "Mar-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS marQuantityTotal
, max(IF(agg_sd.mon_year = "Apr-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS aprQuantityTotal
, max(IF(agg_sd.mon_year = "May-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS mayQuantityTotal
, max(IF(agg_sd.mon_year = "Jun-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS junQuantityTotal
, max(IF(agg_sd.mon_year = "Jul-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS julQuantityTotal
, max(IF(agg_sd.mon_year = "Aug-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS augQuantityTotal
, max(IF(agg_sd.mon_year = "Sep-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS sepQuantityTotal
, max(IF(agg_sd.mon_year = "Oct-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS octQuantityTotal
, max(IF(agg_sd.mon_year = "Nov-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS novQuantityTotal
, max(IF(agg_sd.mon_year = "Dec-{{ execution_date.year }}", agg_sd.totalQuantity, 0)) AS decQuantityTotal
FROM iprocure_stage.inventory inv
JOIN iprocure_ods.product p ON p.product_id = inv.product_id
JOIN iprocure_ods.depot d ON d.depot_id = inv.depot_id
LEFT JOIN (
SELECT FORMAT_TIMESTAMP('%b-%Y', transaction_date) mon_year, product_id, depot_id, SUM(quantity) as totalQuantity
FROM `iprocure_ods.inventorytransaction`
WHERE EXTRACT(YEAR FROM transaction_date) = {{ execution_date.year }}
AND transaction_type = 1 AND (reference_type = 1 OR reference_type = 6) AND delete_status = 0
GROUP BY mon_year, product_id, depot_id
) AS agg_sd ON agg_sd.product_id = inv.product_id AND agg_sd.depot_id = inv.depot_id
GROUP BY inv.inventory_id
, p.product_name
, p.product_type
, p.product_distributor as distributor
, p.product_category as category
, d.depot_name as location
, inv.quantity
, inv.lower_limit
, inv.unit_cost
, inv.quantity * inv.unit_cost as value
, p.product_id, d.depot_id
, TIMESTAMP_SECONDS(inv.update_date) as last_update
, inv.delete_status