我有字典和清单:
list1 = ['J', 'C', 'U', 'N']
list2 = ['K', 'G', 'E', 'Q', 'V']
dictionary = {'key1': ['key1 stuff C_stuff stuff',
'key1 stuff J_stuff stuff',
'key1 stuff K stuff',
'key1 stuff E,V stuff'],
'key2': ['key2 stuff N_stuff stuff',
'key2 stuff Q stuff',
'key2 stuff K,G,V stuff',
'key2 stuff U_stuff stuff'],
'key3': ['key3 stuff K,E,Q stuff',
'key3 stuff G,K,E,V stuff'],
'key4': ['key4 stuff G,E stuff',
'key4 stuff V stuff']}
,我想为value
中的每个key
排序列表dictionary
。因此,必须首先根据list1
对列表进行排序,如果是None
,则必须根据list2
对列表进行排序,如果是None
,则最后要返回以第三列中的逗号。仅返回每个key
的第一个匹配项。这是所需的输出:
'key1': 'key1 stuff J_stuff stuff'
'key2': 'key2 stuff U_stuff stuff'
'key3': 'key3 stuff G,K,E,V stuff'
'key4': 'key4 stuff V stuff'
知道stuff
都不同,如何在Python3中获得这样的结果?
编辑1 :在不使用key
和list1
的情况下为每个list2
排序列表会更快吗?
编辑2 :从文件中添加实际行(见下文)
编辑3 :'key4 stuff V stuff'
在'key4 stuff G,E stuff'
之前返回,因为如果在list2
中发现一个值,则它的优先级高于找到一个值。
编辑4 :像上面的示例一样,添加代码以将我的文本文件的示例加载到字典中。
import sys,re
import collections
dictionary = collections.defaultdict(list)
refseq_list = ["NC", "NG", "NM", "NP", "NR", "XM", "XP", "XR", "WP"]
ensembl_list = ["frameshift_variant", "missense_variant", "inframe_insertion", "inframe_deletion", "initiator_codon_variant", "stop_gained", "stop_lost", "splice_donor_variant", "splice_acceptor_variant", "splice_region_variant", "mature_miRNA_variant", "TF_binding_site_variant", "regulatory_region_variant", "TFBS_ablation", "TFBS_amplification", "regulatory_region_ablation", "regulatory_region_amplification", "coding_sequence_variant", "stop_retained_variant", "NMD_transcript_variant", "incomplete_terminal_codon_variant", "non_coding_exon_variant", "nc_transcript_variant", "5_prime_UTR_variant", "3_prime_UTR_variant", "upstream_gene_variant", "downstream_gene_variant", "intron_variant", "transcript_ablation", "transcript_amplification", "feature_elongation", "feature_truncation", "intergenic_variant", "synonymous_variant"]
with open("/home/test.txt") as FileObj:
for line in FileObj:
if not line.startswith("#"):
line_split = line.split("\t")
dictionary[line_split[0]].append(line)
并将这些行复制到文件中
rs141130360 chr1:16495 C ENSG00000223972 ENST00000450305 Transcript downstream_gene_variant - - - - - rs3210724 G MODIFIER 2825 1 - SNV DDX11L1 HGNC HGNC:37102 transcribed_unprocessed_pseudogene - - - - - - - - - Ensembl G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C ENSG00000223972 ENST00000456328 Transcript downstream_gene_variant - - - - - rs3210724 G MODIFIER 2086 1 - SNV DDX11L1 HGNC HGNC:37102 processed_transcript YES 1 - - - - - - - Ensembl G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C ENSG00000227232 ENST00000488147 Transcript intron_variant,non_coding_transcript_variant - - - - - rs3210724 G MODIFIER - -1 - SNV WASH7P HGNC HGNC:38034 unprocessed_pseudogene YES - - - - - - - - Ensembl G G - - - - - 8/10 - - ENST00000488147.1:n.1067+112C>G - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C ENSG00000278267 ENST00000619216 Transcript downstream_gene_variant - - - - - rs3210724 G MODIFIER 874 -1 - SNV MIR6859-1 HGNC HGNC:50039 miRNA YES - - - - - - - - Ensembl G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C 653635 NC_024540.1 Transcript intron_variant,non_coding_transcript_variant - - - - - rs3210724 G MODIFIER - -1 - SNV WASH7P EntrezGene HGNC:38034 transcribed_pseudogene - - - - - - - - - RefSeq G G OK - - - - 8/10 - - NR_024540.1:n.1080+112C>G - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C 100287102 NR_046018.2 Transcript frameshift_variant - - - - - rs3210724 G MODIFIER 2086 1 - SNV DDX11L1 EntrezGene HGNC:37102 transcribed_pseudogene - - - - - - - - - RefSeq G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C 102466751 NG_106918.1 Transcript downstream_gene_variant - - - - - rs3210724 G MODIFIER 874 -1 - SNV MIR6859-1 EntrezGene HGNC:50039 miRNA - - - - - - - - - RefSeq G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C 102466751 NG_106918.1 Transcript downstream_gene_variant - - - - - rs3210724 G MODIFIER 874 -1 - SNV MIR6859-1 EntrezGene HGNC:50039 miRNA - - - - - - - - - RefSeq G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A ENSG00000223972 ENST00000450305 Transcript downstream_gene_variant - - - - - rs62636367 T MODIFIER 3049 1 - SNV DDX11L1 HGNC HGNC:37102 transcribed_unprocessed_pseudogene - - - - - - - - - Ensembl T T - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A ENSG00000223972 ENST00000456328 Transcript downstream_gene_variant - - - - - rs62636367 T MODIFIER 2310 1 - SNV DDX11L1 HGNC HGNC:37102 processed_transcript YES 1 - - - - - - - Ensembl T T - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A ENSG00000227232 ENST00000488147 Transcript non_coding_transcript_exon_variant 955 - - - - rs62636367 T MODIFIER - -1 - SNV WASH7P HGNC HGNC:38034 unprocessed_pseudogene YES - - - - - - - - Ensembl T T - - - - 8/11 - - - ENST00000488147.1:n.955A>T - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A ENSG00000278267 ENST00000619216 Transcript downstream_gene_variant - - - - - rs62636367 T MODIFIER 650 -1 - SNV MIR6859-1 HGNC HGNC:50039 miRNA YES - - - - - - - - Ensembl T T - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A 653635 NR_024540.1 Transcript non_coding_transcript_exon_variant 968 - - - - rs62636367 T MODIFIER - -1 - SNV WASH7P EntrezGene HGNC:38034 transcribed_pseudogene - - - - - - - - - RefSeq T T OK - - - 8/11 - - - NR_024540.1:n.968A>T - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A 100287102 NR_046018.2 Transcript downstream_gene_variant - - - - - rs62636367 T MODIFIER 2310 1 - SNV DDX11L1 EntrezGene HGNC:37102 transcribed_pseudogene - - - - - - - - - RefSeq T T - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A 102466751 NC_106918.1 Transcript frameshift_variant - - - - - rs62636367 T MODIFIER 650 -1 - SNV MIR6859-1 EntrezGene HGNC:50039 miRNA - - - - - - - - - RefSeq T T - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636368 chr1:16841 T ENSG00000223972 ENST00000450305 Transcript frameshift_variant,downstream_gene_variant - - - - - rs62636368 G MODIFIER 3171 1 - SNV DDX11L1 HGNC HGNC:37102 transcribed_unprocessed_pseudogene - - - - - - - - - Ensembl G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636368 chr1:16841 T ENSG00000227232 ENST00000488147 Transcript intron_variant,frameshift_variant,non_coding_transcript_variant - - - - - rs62636368 G MODIFIER - -1 - SNV WASH7P HGNC HGNC:38034 unprocessed_pseudogene YES - - - - - - - - Ensembl G G - - - - - 7/10 - - ENST00000488147.1:n.908+17C>A - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs373847457 chr1:139233 A ENSG00000756734 ENST00000425211 Transcript intron_variant,non_coding_transcript_variant - -- - - rs373847457 C MODIFIER - -1 - SNV LOC100996442 EntrezGene - misc_RNA - - - - - - - - - RefSeq C C - - - - - 5/6 - - XR_001737578.2:n.1135+26651G>T - - - - - - - - - - 0.26820.3372 0.1808 0.2029 0.3592 0.3082 0.3105 0.2765 0.2229 0.3592 gnomAD_EAS - - - - - - - -
rs373847457 chr1:139233 A ENSG00000754812 ENST00000643524 Transcript intron_variant - -- - - rs373847457 C MODIFIER - -1 - SNV LOC100996442 EntrezGene - misc_RNA - - - - - - - - - RefSeq C C - - - - - 5/5 - - XR_001737579.2:n.1134-18301G>T - - - - - - - - - - 0.26820.3372 0.1808 0.2029 0.3592 0.3082 0.3105 0.2765 0.2229 0.3592 gnomAD_EAS - - - - - - - -
其中
list1 = refseq_list
list2 = ensembl_list
`key1` = `rs141130360` and `NC_046018.2` = `J_stuff` from list1 and `downstream_gene_variant` = `K` from `list2`
@ Ajax1234提供的代码在我首先提供的示例中运行良好。但是实际数据给出了一个错误:
Traceback (most recent call last):
File "readFile.py", line 71, in <module>
result = {a:find_result(b) for a, b in dictionary.items()}
File "readFile.py", line 71, in <dictcomp>
result = {a:find_result(b) for a, b in dictionary.items()}
File "readFile.py", line 64, in find_result
return sorted(_c, key=lambda x:(lambda c:(len(c) != max(_max) and 1 not in _max if _flag else 0, sum(c)))([refseq_list.index(h) for h in re.findall('[A-Z]+', x)]))[0]
File "readFile.py", line 64, in <lambda>
return sorted(_c, key=lambda x:(lambda c:(len(c) != max(_max) and 1 not in _max if _flag else 0, sum(c)))([refseq_list.index(h) for h in re.findall('[A-Z]+', x)]))[0]
File "readFile.py", line 64, in <listcomp>
return sorted(_c, key=lambda x:(lambda c:(len(c) != max(_max) and 1 not in _max if _flag else 0, sum(c)))([refseq_list.index(h) for h in re.findall('[A-Z]+', x)]))[0]
ValueError: 'C' is not in list
修改5 : 这是我接近的另一种方法,但我仍然不知道如何输出最终答案:
import sys,re
import collections
dictionary = collections.defaultdict(list)
refseq = []
refseq_order = []
ensembl = []
ensembl_order = []
ensembl_more_than_one_consequence = []
refseq_dict = {'NC': 0, 'NG': 1, 'NM': 2, 'NP': 3, 'NR': 4, 'XM': 5, 'XP': 6, 'XR': 7, 'WP': 8}
refseq_list = ["NC", "NG", "NM", "NP", "NR", "XM", "XP", "XR", "WP"]
ensembl_dict = {'frameshift_variant': 0, 'missense_variant': 1, 'inframe_insertion': 2, 'inframe_deletion': 3, 'initiator_codon_variant': 4, 'stop_gained': 5, 'stop_lost': 6, 'splice_donor_variant': 7, 'splice_acceptor_variant': 8, 'splice_region_variant': 9, 'mature_miRNA_variant': 10, 'TF_binding_site_variant': 11, 'regulatory_region_variant': 12, 'TFBS_ablation': 13, 'TFBS_amplification': 14, 'regulatory_region_ablation': 15, 'regulatory_region_amplification': 16, 'coding_sequence_variant': 17, 'stop_retained_variant': 18, 'NMD_transcript_variant': 19, 'incomplete_terminal_codon_variant': 20, 'non_coding_exon_variant': 21, 'nc_transcript_variant': 22, '5_prime_UTR_variant': 23, '3_prime_UTR_variant': 24, 'upstream_gene_variant': 25, 'downstream_gene_variant': 26, 'intron_variant': 27, 'transcript_ablation': 28, 'transcript_amplification': 29, 'feature_elongation': 30, 'feature_truncation': 31, 'intergenic_variant': 32, 'synonymous_variant': 33}
ensembl_list = ["frameshift_variant", "missense_variant", "inframe_insertion", "inframe_deletion", "initiator_codon_variant", "stop_gained", "stop_lost", "splice_donor_variant", "splice_acceptor_variant", "splice_region_variant", "mature_miRNA_variant", "TF_binding_site_variant", "regulatory_region_variant", "TFBS_ablation", "TFBS_amplification", "regulatory_region_ablation", "regulatory_region_amplification", "coding_sequence_variant", "stop_retained_variant", "NMD_transcript_variant", "incomplete_terminal_codon_variant", "non_coding_exon_variant", "nc_transcript_variant", "5_prime_UTR_variant", "3_prime_UTR_variant", "upstream_gene_variant", "downstream_gene_variant", "intron_variant", "transcript_ablation", "transcript_amplification", "feature_elongation", "feature_truncation", "intergenic_variant", "synonymous_variant"]
with open("/home/test.txt") as FileObj:
for line in FileObj:
if not line.startswith("#"):
line_split = line.split("\t")
dictionary[line_split[0]].append(line)
def get_index_refseq(s):
by_tabs = s.split("\t")
by_underscore = by_tabs[4].split("_")
return refseq_dict[by_underscore[0]]
def get_index_ensembl(s):
by_tabs = s.split("\t")
return ensembl_dict[by_tabs[6]]
for value in dictionary.values():
for line in value:
line_split = line.split("\t")
if line_split[4].split("_")[0] in refseq_list:
refseq.append(line)
elif line_split[6] in ensembl_list:
ensembl.append(line)
elif len(line_split[6].split(",")) > 1:
ensembl_more_than_one_consequence.append(line)
if refseq:
refseq_order = sorted(refseq, key=get_index_refseq)
for i in refseq_order:
print(i)
输出:
rs141130360 chr1:16495 C 653635 NC_024540.1 Transcript intron_variant,non_coding_transcript_variant - - -- - rs3210724 G MODIFIER - -1 - SNV WASH7P EntrezGene HGNC:38034 transcribed_pseudogene - - - - - - - - - RefSeq G G OK - - - -8/10 - - NR_024540.1:n.1080+112C>G - - - - - - - - - - - -- - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A 102466751 NC_106918.1 Transcript frameshift_variant - - - - -rs62636367 T MODIFIER 650 -1 - SNV MIR6859-1 EntrezGene HGNC:50039 miRNA - - -- - - - - - RefSeq T T - - - - - - - - - -- - - - - - - - - - - - - - - - - - -- - - - - - - - -
rs141130360 chr1:16495 C 102466751 NG_106918.1 Transcript downstream_gene_variant - - - - -rs3210724 G MODIFIER 874 -1 - SNV MIR6859-1 EntrezGene HGNC:50039 miRNA - - -- - - - - - RefSeq G G - - - - - - - - - -- - - - - - - - - - - - - - - - - - -- - - - - - - - -
rs141130360 chr1:16495 C 102466751 NG_106918.1 Transcript downstream_gene_variant - - - - -rs3210724 G MODIFIER 874 -1 - SNV MIR6859-1 EntrezGene HGNC:50039 miRNA - - -- - - - - - RefSeq G G - - - - - - - - - -- - - - - - - - - - - - - - - - - - -- - - - - - - - -
rs141130360 chr1:16495 C 100287102 NR_046018.2 Transcript frameshift_variant - - - - -rs3210724 G MODIFIER 2086 1 - SNV DDX11L1 EntrezGene HGNC:37102 transcribed_pseudogene - -- - - - - - - RefSeq G G - - - - - - - - -- - - - - - - - - - - - - - - - - - -- - - - - - - - - -
rs62636367 chr1:16719 A 653635 NR_024540.1 Transcript non_coding_transcript_exon_variant 968 - - -- rs62636367 T MODIFIER - -1 - SNV WASH7P EntrezGene HGNC:38034 transcribed_pseudogene- - - - - - - - - RefSeq T T OK - - - 8/11 - -- NR_024540.1:n.968A>T - - - - - - - - - - - - - - -- - - - - - - - - - - - - -
rs62636367 chr1:16719 A 100287102 NR_046018.2 Transcript downstream_gene_variant - - - - -rs62636367 T MODIFIER 2310 1 - SNV DDX11L1 EntrezGene HGNC:37102 transcribed_pseudogene - -- - - - - - - RefSeq T T - - - - - - - - -- - - - - - - - - - - - - - - - - - -- - - - - - - - - -
但是我不知道如何仅选择每个value
附带的第一个key
编辑6 :根据Ajax1234的请求,这是我上面的代码中的输入词典。
dictionary = {'rs141130360': ['rs141130360\tchr1:16495\tC\tENSG00000223972\tENST00000450305\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t2825\t1\t-\tSNV\tDDX11L1\tHGNC\tHGNC:37102\ttranscribed_unprocessed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\tENSG00000223972\tENST00000456328\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t2086\t1\t-\tSNV\tDDX11L1\tHGNC\tHGNC:37102\tprocessed_transcript\tYES\t1\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\tENSG00000227232\tENST00000488147\tTranscript\tintron_variant,non_coding_transcript_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t-\t-1\t-\tSNV\tWASH7P\tHGNC\tHGNC:38034\tunprocessed_pseudogene\tYES\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tG\tG\t-\t-\t-\t-\t-\t8/10\t-\t-\tENST00000488147.1:n.1067+112C>G\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\tENSG00000278267\tENST00000619216\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t874\t-1\t-\tSNV\tMIR6859-1\tHGNC\tHGNC:50039\tmiRNA\tYES\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\t653635\tNC_024540.1\tTranscript\tintron_variant,non_coding_transcript_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t-\t-1\t-\tSNV\tWASH7P\tEntrezGene\tHGNC:38034\ttranscribed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tG\tG\tOK\t-\t-\t-\t-\t8/10\t-\t-\tNR_024540.1:n.1080+112C>G\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\t100287102\tNR_046018.2\tTranscript\tframeshift_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t2086\t1\t-\tSNV\tDDX11L1\tEntrezGene\tHGNC:37102\ttranscribed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\t102466751\tNG_106918.1\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t874\t-1\t-\tSNV\tMIR6859-1\tEntrezGene\tHGNC:50039\tmiRNA\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\t102466751\tNG_106918.1\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t874\t-1\t-\tSNV\tMIR6859-1\tEntrezGene\tHGNC:50039\tmiRNA\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n'], 'rs62636367': ['rs62636367\tchr1:16719\tA\tENSG00000223972\tENST00000450305\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t3049\t1\t-\tSNV\tDDX11L1\tHGNC\tHGNC:37102\ttranscribed_unprocessed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tT\tT\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636367\tchr1:16719\tA\tENSG00000223972\tENST00000456328\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t2310\t1\t-\tSNV\tDDX11L1\tHGNC\tHGNC:37102\tprocessed_transcript\tYES\t1\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tT\tT\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636367\tchr1:16719\tA\tENSG00000227232\tENST00000488147\tTranscript\tnon_coding_transcript_exon_variant\t955\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t-\t-1\t-\tSNV\tWASH7P\tHGNC\tHGNC:38034\tunprocessed_pseudogene\tYES\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tT\tT\t-\t-\t-\t-\t8/11\t-\t-\t-\tENST00000488147.1:n.955A>T\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636367\tchr1:16719\tA\tENSG00000278267\tENST00000619216\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t650\t-1\t-\tSNV\tMIR6859-1\tHGNC\tHGNC:50039\tmiRNA\tYES\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tT\tT\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636367\tchr1:16719\tA\t653635\tNR_024540.1\tTranscript\tnon_coding_transcript_exon_variant\t968\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t-\t-1\t-\tSNV\tWASH7P\tEntrezGene\tHGNC:38034\ttranscribed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tT\tT\tOK\t-\t-\t-\t8/11\t-\t-\t-\tNR_024540.1:n.968A>T\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636367\tchr1:16719\tA\t100287102\tNR_046018.2\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t2310\t1\t-\tSNV\tDDX11L1\tEntrezGene\tHGNC:37102\ttranscribed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tT\tT\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636367\tchr1:16719\tA\t102466751\tNC_106918.1\tTranscript\tframeshift_variant\t-\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t650\t-1\t-\tSNV\tMIR6859-1\tEntrezGene\tHGNC:50039\tmiRNA\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tT\tT\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n'], 'rs62636368': ['rs62636368\tchr1:16841\tT\tENSG00000223972\tENST00000450305\tTranscript\tframeshift_variant,downstream_gene_variant\t-\t-\t-\t-\t-\trs62636368\tG\tMODIFIER\t3171\t1\t-\tSNV\tDDX11L1\tHGNC\tHGNC:37102\ttranscribed_unprocessed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636368\tchr1:16841\tT\tENSG00000227232\tENST00000488147\tTranscript\tintron_variant,frameshift_variant,non_coding_transcript_variant\t-\t-\t-\t-\t-\trs62636368\tG\tMODIFIER\t-\t-1\t-\tSNV\tWASH7P\tHGNC\tHGNC:38034\tunprocessed_pseudogene\tYES\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tG\tG\t-\t-\t-\t-\t-\t7/10\t-\t-\tENST00000488147.1:n.908+17C>A\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n'], 'rs373847457': ['rs373847457\tchr1:139233\tA\tENSG00000756734\tENST00000425211\tTranscript\tintron_variant,non_coding_transcript_variant\t-\t--\t-\t-\trs373847457\tC\tMODIFIER\t-\t-1\t-\tSNV\tLOC100996442\tEntrezGene\t-\tmisc_RNA\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tC\tC\t-\t-\t-\t-\t-\t5/6\t-\t-\tXR_001737578.2:n.1135+26651G>T\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t0.26820.3372\t0.1808\t0.2029\t0.3592\t0.3082\t0.3105\t0.2765\t0.2229\t0.3592\tgnomAD_EAS\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs373847457\tchr1:139233\tA\tENSG00000754812\tENST00000643524\tTranscript\tintron_variant\t-\t--\t-\t-\trs373847457\tC\tMODIFIER\t-\t-1\t-\tSNV\tLOC100996442\tEntrezGene\t-\tmisc_RNA\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tC\tC\t-\t-\t-\t-\t-\t5/5\t-\t-\tXR_001737579.2:n.1134-18301G>T\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t0.26820.3372\t0.1808\t0.2029\t0.3592\t0.3082\t0.3105\t0.2765\t0.2229\t0.3592\tgnomAD_EAS\t-\t-\t-\t-\t-\t-\t-\t-']}