我使用BigQuery报告Google Analytics数据。我尝试使用BigQuery重新创建着陆页数据。
以下查询报告的会话数比Google Analytics界面少18%:
SELECT DISTINCT
fullVisitorId,
visitID,
h.page.pagePath AS LandingPage
FROM
`project-name.dataset.ga_sessions_*`, UNNEST(hits) AS h
WHERE
hitNumber = 1
AND h.type = 'PAGE'
AND _TABLE_SUFFIX BETWEEN '20170331' AND '20170331'
ORDER BY fullVisitorId DESC
我的方法在哪里出错了?为什么我不能在GA界面报告数字的一小部分内达到目标?
答案 0 :(得分:2)
多种原因:
1.Big查询等效登录页面:
SELECT
LandingPage,
COUNT(sessionId) AS Sessions,
100 * SUM(totals.bounces)/COUNT(sessionId) AS BounceRate,
AVG(totals.pageviews) AS AvgPageviews,
SUM(totals.timeOnSite)/COUNT(sessionId) AS AvgTimeOnSite,
from(
SELECT
CONCAT(fullVisitorId,STRING(visitId)) AS sessionID,
totals.bounces,
totals.pageviews,
totals.timeOnSite,
hits.page.pagePath AS landingPage
FROM (
SELECT
fullVisitorId,
visitId,
hits.page.pagePath,
totals.bounces,
totals.pageviews,
totals.timeOnSite,
MIN(hits.hitNumber) WITHIN RECORD AS firstHit,
hits.hitNumber AS hitNumber
FROM (TABLE_DATE_RANGE ([XXXYYYZZZ.ga_sessions_],TIMESTAMP('2016-08-01'), TIMESTAMP ('2016-08-31')))
WHERE
hits.type = 'PAGE'
AND hits.page.pagePath'')
WHERE
hitNumber = firstHit)
GROUP BY
LandingPage
ORDER BY
Sessions DESC,
LandingPage
下一步:
预先计算的数据 - 预先汇总的表格
这些是Google用于加速用户界面的预先计算的数据。谷歌没有具体说明何时完成,但可以在任何时候进行。这些被称为预聚合表
因此,如果您将GA UI中的数字与Big Query输出进行比较,您将始终看到差异。请继续并依赖您的大查询数据。
答案 1 :(得分:1)
只需将以下内容添加到您的select语句中,您就可以实现相同的目的:
,(SELECT page.pagePath FROM UNNEST(hits) WHERE hitnumber = (SELECT MIN(hitnumber) FROM UNNEST(hits) WHERE type = 'PAGE')) landingpage
当我运行如下所示的内容时,我可以与GA UI进行一对一的匹配,这比原始答案更简洁:
SELECT DISTINCT
a.landingpage
,COUNT(DISTINCT(a.sessionId)) sessions
,SUM(a.bounces) bounces
,AVG(a.avg_pages) avg_pages
,(SUM(tos)/COUNT(DISTINCT(a.sessionId)))/60 session_duration
FROM
(
SELECT DISTINCT
CONCAT(CAST(fullVisitorId AS STRING),CAST(visitStartTime AS STRING)) sessionId
,(SELECT page.pagePath FROM UNNEST(hits) WHERE hitnumber = (SELECT MIN(hitnumber) FROM UNNEST(hits) WHERE type = 'PAGE')) landingpage
,totals.bounces bounces
,totals.timeonsite tos
,(SELECT COUNT(hitnumber) FROM UNNEST(hits) WHERE type = 'PAGE') avg_pages
FROM `tablename_*`
WHERE _TABLE_SUFFIX >= '20180801'
AND _TABLE_SUFFIX <= '20180808'
AND totals.visits = 1
) a
GROUP BY 1
答案 2 :(得分:0)
另一种方式!您可以获得相同的号码:
SELECT
LandingPage,
COUNT(DISTINCT(sessionID)) AS sessions
FROM(
SELECT
CONCAT(fullVisitorId,CAST(visitId AS STRING)) AS sessionID,
FIRST_VALUE(hits.page.pagePath) OVER (PARTITION BY CONCAT(fullVisitorId,CAST(visitId AS STRING)) ORDER BY hits.hitNumber ASC ) AS LandingPage
FROM
`xxxxxxxx1.ga_sessions_*`,
UNNEST(hits) AS hits
WHERE
_TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
AND FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
AND hits.type ='PAGE'
GROUP BY fullVisitorId, visitId, sessionID,hits.page.pagePath,hits.hitNumber
)
GROUP BY LandingPage
ORDER BY sessions DESC
答案 3 :(得分:0)
模式中有一个hit.isEntrance字段可用于此目的。 下面的示例将向您显示昨天的目标网页:
#standardSQL
select
date,
hits.page.pagePath as landingPage,
sum(totals.visits) as visits,
sum(totals.bounces) as bounces,
sum(totals.transactions) as transactions
from
`project.dataset.ga_sessions_*`,
unnest(hits) as hits
where
(_table_suffix
between format_date("%Y%m%d", date_sub(current_date(), interval 1 day))
and format_date("%Y%m%d", date_sub(current_date(), interval 1 day)))
and hits.isEntrance = True
and totals.visits = 1 #avoid counting midnight-split sessions
group by
1, 2
order by 3 desc
尽管如此,仍然存在差异的来源,其中一个原因是没有登录页面的会话(如果您在登录页面报告中签入GA,则有时会出现一个(未设置)值
要同时包括这些内容,您可以执行以下操作:
with
landing_pages_set as (
select
concat(cast(fullVisitorId as string), cast(visitId as string), cast(date as string)) as fullVisitId,
hits.page.pagePath as virtualPagePath
from
`project.dataset.ga_sessions_*`,
unnest(hits) as hits
where
(_table_suffix
between format_date("%Y%m%d", date_sub(current_date(), interval 1 day))
and format_date("%Y%m%d", date_sub(current_date(), interval 1 day)))
and totals.visits = 1 #avoid counting midnight-split sessions
and hits.isEntrance = TRUE
group by 1, 2
),
landing_pages_not_set as (
select
concat(cast(fullVisitorId as string), cast(visitId as string), cast(date as string)) as fullVisitId,
date,
"(not set)" as virtualPagePath,
count(distinct concat(cast(fullVisitorId as string), cast(visitId as string), cast(date as string))) as visits,
sum(totals.bounces) as bounces,
sum(totals.transactions) as transactions
from
`project.dataset.ga_sessions_*`
where
(_table_suffix
between format_date("%Y%m%d", date_sub(current_date(), interval 1 day))
and format_date("%Y%m%d", date_sub(current_date(), interval 1 day)))
and totals.visits = 1 #avoid counting midnight-split sessions
group by 1, 2, 3
),
landing_pages as (
select
l.fullVisitId as fullVisitId,
date,
coalesce(r.virtualPagePath, l.virtualPagePath) as virtualPagePath,
visits,
bounces,
transactions
from
landing_pages_not_set l left join landing_pages_set r on l.fullVisitId = r.fullVisitId
)
select virtualPagePath, sum(visits) from landing_pages group by 1 order by 2 desc