我试图从httparchive har表中提取一些免费提供的信息。
麻烦的是BQ在JSON_EXTRACT()
函数上返回错误,尽管许多jsonPath验证器正在执行它(http://jsonpath.com/或jsonpath.curiousconcept.com/)。
这是我的问题:
SELECT
JSON_EXTRACT(payload,"$._host")AS host,
JSON_EXTRACT(payload,"$.request.headers[?(@.name=='Referer')]")AS referer,
url,
payload
FROM
[httparchive:har.2016_01_01_chrome_requests]
LIMIT
100
基于下面的示例有效负载json的预期回报将是
" http://www.echosdunet.net/"
这是错误:
错误:JSONPath解析错误:[?(@。name ==' Referer')]
这是一个有效负载json:
{
"pageref": "page_1_0",
"startedDateTime": "2016-01-03T22:18:52.632+00:00",
"time": 452,
"request": {
"method": "GET",
"url": "http://disqus.com/embed/comments/?base=default&version=f3e1717b71e7256da258d3a504e56865&f=echosdunet&t_i=node%2F19849&t_u=http%3A%2F%2Fwww.echosdunet.net%2Fnode%2F19849&t_e=Accueil&t_d=Comparatif%20et%20test%20ADSL%20et%20fibre&t_t=Accueil&s_o=default&l=fr",
"headersSize": 650,
"bodySize": -1,
"cookies": [],
"headers": [{
"name": "Host",
"value": "disqus.com"
}, {
"name": "Connection",
"value": "keep-alive"
}, {
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
}, {
"name": "Upgrade-Insecure-Requests",
"value": "1"
}, {
"name": "User-Agent",
"value": "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36 PTST/254"
}, {
"name": "Referer",
"value": "http://www.echosdunet.net/"
}, {
"name": "Accept-Encoding",
"value": "gzip, deflate, sdch"
}, {
"name": "Accept-Language",
"value": "en-US,en;q=0.8"
}],
"httpVersion": "1.1",
"queryString": [{
"name": "base",
"value": "default"
}, {
"name": "version",
"value": "f3e1717b71e7256da258d3a504e56865"
}, {
"name": "f",
"value": "echosdunet"
}, {
"name": "t_i",
"value": "node/19849"
}, {
"name": "t_u",
"value": "http://www.echosdunet.net/node/19849"
}, {
"name": "t_e",
"value": "Accueil"
}, {
"name": "t_d",
"value": "Comparatif et test ADSL et fibre"
}, {
"name": "t_t",
"value": "Accueil"
}, {
"name": "s_o",
"value": "default"
}, {
"name": "l",
"value": "fr"
}]
},
"response": {
"status": 200,
"statusText": "",
"headersSize": 1161,
"bodySize": 2017,
"headers": [{
"name": "Server",
"value": "nginx"
}, {
"name": "Content-Type",
"value": "text/html; charset=utf-8"
}, {
"name": "Content-Security-Policy",
"value": "script-src https://*.twitter.com:* https://api.adsnative.com/v1/ad.json *.adsafeprotected.com https://cas.criteo.com/delivery/0.1/napi.jsonp *.services.disqus.com:* http://referrer.disqus.com/juggler/ disqus.com http://*.twitter.com:* a.disquscdn.com https://referrer.disqus.com/juggler/ https://*.services.disqus.com:* *.moatads.com 'unsafe-eval' https://mobile.adnxs.com/mob https://ssl.google-analytics.com"
}, {
"name": "Link",
"value": "<http://a.disquscdn.com>;rel=preconnect,<http://a.disquscdn.com>;rel=dns-prefetch"
}, {
"name": "Cache-Control",
"value": "stale-if-error=3600, s-stalewhilerevalidate=3600, stale-while-revalidate=30, no-cache, must-revalidate, public, s-maxage=5"
}, {
"name": "p3p",
"value": "CP=\\DSP IDC CUR ADM DELi STP NAV COM UNI INT PHY DEM\\"
}, {
"name": "Timing-Allow-Origin",
"value": "*"
}, {
"name": "X-Content-Type-Options",
"value": "nosniff"
}, {
"name": "X-XSS-Protection",
"value": "1; mode=block"
}, {
"name": "Last-Modified",
"value": "Thu, 11 Jun 2015 13:30:36 GMT"
}, {
"name": "ETag",
"value": "W/\\lounge:view:3840102421.d93d9c4bc037078ffc811833ae267a6f.0\\"
}, {
"name": "Content-Encoding",
"value": "gzip"
}, {
"name": "Content-Length",
"value": "2017"
}, {
"name": "Accept-Ranges",
"value": "bytes"
}, {
"name": "Date",
"value": "Sun, 03 Jan 2016 22:18:51 GMT"
}, {
"name": "Age",
"value": "0"
}, {
"name": "Connection",
"value": "keep-alive"
}, {
"name": "Vary",
"value": "Accept-Encoding"
}],
"httpVersion": "1.1",
"redirectURL": "",
"content": {
"size": 2017,
"mimeType": "text/html"
},
"cookies": []
},
"cache": {},
"timings": {
"blocked": -1,
"dns": 202,
"connect": 32,
"ssl": -1,
"send": 0,
"wait": 108,
"receive": 110
},
"_ip_addr": "104.156.81.134",
"_method": "GET",
"_host": "disqus.com",
"_url": "/embed/comments/?base=default&version=f3e1717b71e7256da258d3a504e56865&f=echosdunet&t_i=node%2F19849&t_u=http%3A%2F%2Fwww.echosdunet.net%2Fnode%2F19849&t_e=Accueil&t_d=Comparatif%20et%20test%20ADSL%20et%20fibre&t_t=Accueil&s_o=default&l=fr",
"_responseCode": "200",
"_load_ms": "218",
"_ttfb_ms": "108",
"_load_start": "2632",
"_bytesOut": "652",
"_bytesIn": "3180",
"_objectSize": "2017",
"_cacheControl": "stale-if-error=3600, s-stalewhilerevalidate=3600, stale-while-revalidate=30, no-cache, must-revalidate, public, s-maxage=5",
"_contentType": "text/html",
"_contentEncoding": "gzip",
"_type": "3",
"_socket": "153",
"_score_cache": "-1",
"_score_cdn": "-1",
"_score_gzip": "100",
"_score_cookies": "-1",
"_score_keep-alive": "100",
"_score_minify": "-1",
"_score_combine": "-1",
"_score_compress": "-1",
"_score_etags": "-1",
"_is_secure": "0",
"_dns_ms": 202,
"_connect_ms": 32,
"_ssl_ms": "-1",
"_gzip_total": "3180",
"_gzip_save": "0",
"_minify_total": "0",
"_minify_save": "0",
"_image_total": "0",
"_image_save": "0",
"_cache_time": "-1",
"_dns_start": "2395",
"_dns_end": "2597",
"_connect_start": "2600",
"_connect_end": "2632",
"_ssl_start": "0",
"_ssl_end": "0",
"_initiator": "http://echosdunet.disqus.com/embed.js?_=1451859532217",
"_initiator_line": "16",
"_initiator_column": "8205",
"_server_count": "4",
"_server_rtt": "32",
"_client_port": "62284",
"_jpeg_scan_count": "0",
"_full_url": "http://disqus.com/embed/comments/?base=default&version=f3e1717b71e7256da258d3a504e56865&f=echosdunet&t_i=node%2F19849&t_u=http%3A%2F%2Fwww.echosdunet.net%2Fnode%2F19849&t_e=Accueil&t_d=Comparatif%20et%20test%20ADSL%20et%20fibre&t_t=Accueil&s_o=default&l=fr",
"_score_progressive_jpeg": -1,
"_body": true,
"_load_end": 2850,
"_ttfb_start": "2632",
"_ttfb_end": 2740,
"_download_start": 2740,
"_download_end": 2850,
"_download_ms": 110,
"_all_start": "2395",
"_all_end": 2850,
"_all_ms": 452,
"_index": 55,
"_number": 56,
"_body_url": "/response_body.php?test=160101_10_KZQ1&run=1&cached=0&request=56"
}
在没有referer jsonPath的情况下运行查询可以正常工作:
SELECT
JSON_EXTRACT(payload,"$._host")AS host,
url,
payload
FROM
[httparchive:har.2016_01_01_chrome_requests]
LIMIT
100
答案 0 :(得分:1)
example.com/fysioWebapp/videos/Rug%20hol-bol%20maken.mp4
和JSON_EXTRACT
不支持完整的JSONPath元素集。您可以在documentation中阅读更多内容。我建议使用standard SQL,因为JSON函数的实现有更好的错误消息。
如果您对其他类型的JSONPath元素感兴趣,可以考虑submitting a feature request。
答案 1 :(得分:1)
同时,请尝试以下(针对BigQuery Standard SQL)
#standardSQL
CREATE TEMPORARY FUNCTION CUSTOM_JSON_EXTRACT(json STRING, key String)
RETURNS string
LANGUAGE js AS """
try {
var parsed = JSON.parse(json);
for (i = 0; i < parsed.length; i++) {
if (parsed[i].name == key) return parsed[i].value
}
} catch (e) {}
return null;
""";
SELECT
JSON_EXTRACT(payload,"$._host")AS host,
CUSTOM_JSON_EXTRACT(JSON_EXTRACT(payload, "$.request.headers"), 'Referer') AS referer,
url,
payload
FROM
`httparchive.har.2016_01_01_chrome_requests`
LIMIT
100