我有一个包含三个字段的表:
Ref Alt INFO
A T SNP;FN1,DKFZp686O22169;DKFZp686O22169(uc002vez.2)///FN1(uc010zjp.1)///FN1(uc002vfa.2)///FN1(uc002vfb.2)///FN1(uc002vfc.2)///FN1(uc002vfd.2)///FN1(uc002vfe.2)///FN1(uc002vff.2)///FN1(uc002vfg.2)///FN1(uc002vfh.2)///FN1(uc002vfi.2)///FN1(uc002vfj.2)///FN1(uc010fvc.1)///FN1(uc010fvd.1);Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding;5UTR///Intron_8///Intron_32///Intron_31///Intron_31///Intron_32///Intron_31///Intron_31///Intron_31///Intron_31///Intron_32///Intron_32///Intron_2///Intron_2;.///.///.///.///.///.///.///.///.///.///.///.///.///.;.///.///.///.///.///.///.///.///.///.///.///.///.///.;A-0.9491,T-0.0509;A-970,T-52;A/A-0.9002,A/T-0.0978,T/T-0.0020;A/A-460,A/T-50,T/T-1,N/N-0
无论如何,我可以使用前两个字段从A/T-0.0978
字段中提取INFO
吗?
谢谢!
答案 0 :(得分:2)
以下是用于BigQuery标准SQL的代码,并提取给定组合的所有值(在您的特定示例中为两个)
[{"id": 0, "metadata": {"meta_1": "B", "meta_2": "Red"}, "value": 0.3169439789955154}, {"id": 1, "metadata": {"meta_1": "C", "meta_2": "Green"}, "value": 0.5672345948633107}, {"id": 2, "metadata": {"meta_1": "B", "meta_2": "Red"}, "value": 0.36909249143056766}, {"id": 3, "metadata": {"meta_1": "C", "meta_2": "Red"}, "value": 0.8033913639248945}, {"id": 4, "metadata": {"meta_1": "B", "meta_2": "Red"}, "value": 0.04500655943447107}, {"id": 5, "metadata": {"meta_1": "A", "meta_2": "Red"}, "value": 0.43388699497426875}, {"id": 6, "metadata": {"meta_1": "C", "meta_2": "Green"}, "value": 0.14265358049247878}, {"id": 7, "metadata": {"meta_1": "C", "meta_2": "Red"}, "value": 0.7823049064345722}, {"id": 8, "metadata": {"meta_1": "B", "meta_2": "Blue"}, "value": 0.9522025604707016}, {"id": 9, "metadata": {"meta_1": "C", "meta_2": "Red"}, "value": 0.3863207799791931}]
如果您针对问题中的数据运行它-结果将为
#standardSQL
SELECT REGEXP_EXTRACT_ALL(INFO, CONCAT(r'[,;](', Ref, '/', Alt, '.*?)[,;]')) val
FROM `project.dataset.table`
有输出
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'A' Ref, 'T' Alt, 'SNP;FN1,DKFZp686O22169;DKFZp686O22169(uc002vez.2)///FN1(uc010zjp.1)///FN1(uc002vfa.2)///FN1(uc002vfb.2)///FN1(uc002vfc.2)///FN1(uc002vfd.2)///FN1(uc002vfe.2)///FN1(uc002vff.2)///FN1(uc002vfg.2)///FN1(uc002vfh.2)///FN1(uc002vfi.2)///FN1(uc002vfj.2)///FN1(uc010fvc.1)///FN1(uc010fvd.1);Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding///Protein_Coding;5UTR///Intron_8///Intron_32///Intron_31///Intron_31///Intron_32///Intron_31///Intron_31///Intron_31///Intron_31///Intron_32///Intron_32///Intron_2///Intron_2;.///.///.///.///.///.///.///.///.///.///.///.///.///.;.///.///.///.///.///.///.///.///.///.///.///.///.///.;A-0.9491,T-0.0509;A-970,T-52;A/A-0.9002,A/T-0.0978,T/T-0.0020;A/A-460,A/T-50,T/T-1,N/N-0' INFO
)
SELECT REGEXP_EXTRACT_ALL(INFO, CONCAT(r'[,;](', Ref, '/', Alt, '.*?)[,;]')) val
FROM `project.dataset.table`
如果您想要第一个值-您可以使用
Row val
1 A/T-0.0978
A/T-50
或
#standardSQL
SELECT REGEXP_EXTRACT_ALL(INFO, CONCAT(r'[,;](', Ref, '/', Alt, '.*?)[,;]'))[OFFSET(0)] val
FROM `project.dataset.table`