我的目标是通过sample_contents
sample_commits
repo_name
1)首先,我与sample_contents
加入files
,以便它现在包含repo_name
SELECT line,a.id,sample_path,sample_repo_name,repo_name
FROM (
SELECT * FROM (
SELECT (SPLIT(content, '\n')) line , a.id,sample_path,sample_repo_name,repo_name
FROM (
(SELECT * FROM [bigquery-public-data:github_repos.sample_contents] WHERE sample_path LIKE '%.java' )
) a JOIN ( SELECT * FROM [bigquery-public-data:github_repos.files] ) b ON a.id=b.id
) WHERE REGEXP_MATCH(line, '^String|^private int|^public|[.]')
)
2)现在,我做了以下query
期望通过repo_name
获取任何给定文件的所有提交:
SELECT (CASE WHEN line CONTAINS 'String' THEN 'String' ELSE '' END) AS column_1,
(CASE WHEN line CONTAINS 'public' THEN 'public' ELSE '' END) AS column_2,line,a.id,sample_path,sample_repo_name,X.repo_name
FROM (
SELECT * FROM (
SELECT (SPLIT(content, '\n')) line , a.id,sample_path,sample_repo_name,repo_name
FROM (
(SELECT * FROM [bigquery-public-data:github_repos.sample_contents] WHERE sample_path LIKE '%.java' )
) a JOIN ( SELECT * FROM [bigquery-public-data:github_repos.files] ) b ON a.id=b.id
) WHERE REGEXP_MATCH(line, '^String|^private int|^public|[.]')
)
X JOIN (SELECT * FROM [bigquery-public-data:github_repos.sample_commits]) Y ON X.repo_name=Y.repo_name LIMIT 100
谢谢,