我在BigQuery中使用以下查询来连接表并获取 visitnumber 以及订购产品的人员来源:
QUERY:
SELECT visitor, transactionId as OrderNumber, source, referralPath, medium, keyword, campaign, visitNumber, visitStartTime, conversionTime, visitLastClikcTime
FROM (
SELECT conversions.fullVisitorId as visitor, conversions.transactionId as transactionId,
clickChanges.visitNumber as visitNumber, conversions.visitNumber as transactionVisitNumber,
clickChanges.source as source, clickChanges.referralPath as referralPath,
clickChanges.medium as medium, clickChanges.keyword as keyword, clickChanges.campaign as campaign,
conversionTime, min(clikcTime) as visitStartTime, max(clikcTime) as visitLastClikcTime
FROM (SELECT fullVisitorId, hits.transaction.transactionId as transactionId, visitNumber as visitNumber,
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as conversionTime
FROM
(TABLE_DATE_RANGE([54247008.ga_sessions_],
TIMESTAMP('2014-11-16'),
TIMESTAMP('2014-11-21')))
WHERE hits.transaction.transactionId is not null
ORDER BY fullVisitorId, visitNumber
) conversions
JOIN EACH(
SELECT fullVisitorId, trafficSource.referralPath as referralPath, trafficSource.medium as medium, trafficSource.source as source, visitNumber,
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as clikcTime,
trafficSource.campaign as campaign, trafficSource.keyword as keyword
FROM (TABLE_DATE_RANGE([54247008.ga_sessions_],
TIMESTAMP('2014-11-16'),
TIMESTAMP('2014-11-21')))
GROUP BY fullVisitorId, referralPath, medium, source, visitNumber, clikcTime, campaign, keyword
ORDER BY fullVisitorId, visitNumber
) clickChanges
ON
conversions.fullVisitorId = clickChanges.fullVisitorId
GROUP BY visitor, transactionId, visitNumber, transactionVisitNumber, source, referralPath, medium, keyword, campaign, conversionTime
HAVING visitNumber <= transactionVisitNumber
ORDER BY transactionId, visitNumber
)
现在我希望每个transationId获得一个带有MAX( visitnumber )的聚合列,并向我显示与该MAX相关的信息( visitnumber )。为此,我在这里进行了研究并构建了整个查询的另一个JOIN
,您可以在下面看到它,这是有效的,但它也非常长且重复:
SAME QUERY(略有修改):
SELECT a.visitor, a.transactionId as OrderNumber, a.source, a.referralPath, a.medium, a.keyword, a.campaign, a.visitNumber as max_visitnumber, a.visitStartTime, a.conversionTime, a.visitLastClikcTime
FROM (
SELECT conversions.fullVisitorId as visitor, conversions.transactionId as transactionId,
clickChanges.visitNumber as visitNumber, conversions.visitNumber as transactionVisitNumber,
clickChanges.source as source, clickChanges.referralPath as referralPath,
clickChanges.medium as medium, clickChanges.keyword as keyword, clickChanges.campaign as campaign,
conversionTime, min(clikcTime) as visitStartTime, max(clikcTime) as visitLastClikcTime
FROM (SELECT fullVisitorId, hits.transaction.transactionId as transactionId, visitNumber as visitNumber,
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as conversionTime
FROM
(TABLE_DATE_RANGE([54247008.ga_sessions_],
TIMESTAMP('2014-11-16'),
TIMESTAMP('2014-11-21')))
WHERE hits.transaction.transactionId is not null
ORDER BY fullVisitorId, visitNumber
) conversions
JOIN EACH(
SELECT fullVisitorId, trafficSource.referralPath as referralPath, trafficSource.medium as medium, trafficSource.source as source, visitNumber,
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as clikcTime,
trafficSource.campaign as campaign, trafficSource.keyword as keyword
FROM (TABLE_DATE_RANGE([54247008.ga_sessions_],
TIMESTAMP('2014-11-16'),
TIMESTAMP('2014-11-21')))
GROUP BY fullVisitorId, referralPath, medium, source, visitNumber, clikcTime, campaign, keyword
ORDER BY fullVisitorId, visitNumber
) clickChanges
ON
conversions.fullVisitorId = clickChanges.fullVisitorId
GROUP BY visitor, transactionId, visitNumber, transactionVisitNumber, source, referralPath, medium, keyword, campaign, conversionTime
HAVING visitNumber <= transactionVisitNumber
ORDER BY transactionId, visitNumber
) a
JOIN EACH
(
SELECT MAX(visitNumber) as max_visitnumber, transactionId
FROM (
SELECT conversions.fullVisitorId as visitor, conversions.transactionId as transactionId,
clickChanges.visitNumber as visitNumber, conversions.visitNumber as transactionVisitNumber,
clickChanges.source as source, clickChanges.referralPath as referralPath,
clickChanges.medium as medium, clickChanges.keyword as keyword, clickChanges.campaign as campaign,
conversionTime, min(clikcTime) as visitStartTime, max(clikcTime) as visitLastClikcTime
FROM (SELECT fullVisitorId, hits.transaction.transactionId as transactionId, visitNumber as visitNumber,
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as conversionTime
FROM
(TABLE_DATE_RANGE([54247008.ga_sessions_],
TIMESTAMP('2014-11-16'),
TIMESTAMP('2014-11-21')))
WHERE hits.transaction.transactionId is not null
ORDER BY fullVisitorId, visitNumber
) conversions
JOIN EACH(
SELECT fullVisitorId, trafficSource.referralPath as referralPath, trafficSource.medium as medium, trafficSource.source as source, visitNumber,
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as clikcTime,
trafficSource.campaign as campaign, trafficSource.keyword as keyword
FROM (TABLE_DATE_RANGE([54247008.ga_sessions_],
TIMESTAMP('2014-11-16'),
TIMESTAMP('2014-11-21')))
GROUP BY fullVisitorId, referralPath, medium, source, visitNumber, clikcTime, campaign, keyword
ORDER BY fullVisitorId, visitNumber
) clickChanges
ON
conversions.fullVisitorId = clickChanges.fullVisitorId
GROUP BY visitor, transactionId, visitNumber, transactionVisitNumber, source, referralPath, medium, keyword, campaign, conversionTime
HAVING visitNumber <= transactionVisitNumber
ORDER BY transactionId, visitNumber
)
GROUP EACH BY transactionId
) b
ON
b.max_visitnumber = a.visitnumber and
b.transactionId = a.transactionId
ORDER BY max_visitnumber DESC
所以我把两次几乎相同的查询,只改变了一小部分。有没有更简洁的方法来做到这一点并缩短查询?
我发现StackOverflow上有人建议使用WITH AS
的常规表表达式,但它似乎不适用于BigQuery。是否有其他想法可以提高查询效率?
答案 0 :(得分:3)
正如评论所说,视图就是您想要的:您可以在其他查询中重复使用的查询的别名。
https://cloud.google.com/bigquery/querying-data#views
请注意,视图[当前]与TABLE_DATE_RANGE不兼容,因此您需要重写查询/视图以显式调用表。