我想从pandas data-frame中的文件中读取双序列化json
对象。
json
的示例如下,
{"input":"8\t140630920\t.\tC\tT\t840.948\t.","assembly_name":"GRCh37","end":140630920,"seq_region_name":"8","transcript_consequences":[{"source":"Ensembl","variant_allele":"T","cdna_end":770,"phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"codons":"Ggg/Agg","protein_end":236,"strand":-1,"amino_acids":"G/R","cdna_start":770,"transcript_id":"ENST00000520439","cds_start":706,"gene_id":"ENSG00000169427","protein_start":236,"cds_end":706,"consequence_terms":["missense_variant"],"impact":"MODERATE"},{"source":"RefSeq","variant_allele":"T","cdna_end":770,"phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"codons":"Ggg/Agg","protein_end":236,"strand":-1,"amino_acids":"G/R","cdna_start":770,"transcript_id":"NM_016601.2","cds_start":706,"gene_id":51305,"protein_start":236,"cds_end":706,"consequence_terms":["missense_variant"],"impact":"MODERATE"},{"source":"RefSeq","variant_allele":"T","cdna_end":764,"phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"codons":"Ggg/Agg","protein_end":236,"strand":-1,"amino_acids":"G/R","cdna_start":764,"transcript_id":"XM_005250954.1","cds_start":706,"gene_id":51305,"protein_start":236,"cds_end":706,"consequence_terms":["missense_variant"],"impact":"MODERATE"},{"source":"Ensembl","variant_allele":"T","cdna_end":755,"phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"codons":"Ggg/Agg","protein_end":236,"strand":-1,"amino_acids":"G/R","cdna_start":755,"transcript_id":"ENST00000522317","cds_start":706,"gene_id":"ENSG00000169427","protein_start":236,"cds_end":706,"consequence_terms":["missense_variant","NMD_transcript_variant"],"impact":"MODERATE"},{"source":"Ensembl","variant_allele":"T","cdna_end":770,"phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"codons":"Ggg/Agg","protein_end":236,"strand":-1,"amino_acids":"G/R","cdna_start":770,"transcript_id":"ENST00000303015","cds_start":706,"gene_id":"ENSG00000169427","protein_start":236,"cds_end":706,"consequence_terms":["missense_variant"],"impact":"MODERATE"},{"gene_id":"ENSG00000169427","source":"Ensembl","distance":1672,"variant_allele":"T","phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"consequence_terms":["upstream_gene_variant"],"strand":-1,"transcript_id":"ENST00000523477","impact":"MODIFIER"},{"gene_id":"ENSG00000169427","source":"Ensembl","distance":2630,"variant_allele":"T","phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"consequence_terms":["upstream_gene_variant"],"strand":-1,"transcript_id":"ENST00000519923","impact":"MODIFIER"}],"strand":1,"id":"8_140630920_C/T","allele_string":"C/T","most_severe_consequence":"missense_variant","start":140630920}
使用
在pandas数据框中读取此内容dft = pd.read_json(filename, lines = True)
结果如下所示, Table generated by pandas read-json
但是,我想从['transcript_consequences']
列中提取信息,也要从['phenotypes']
列中的['transcript_consequences']
提取信息。
如何在pandas数据框中实现这一目标?
答案 0 :(得分:0)
选项可能如下:
>>> import pandas as pd
>>> jsona = pd.read_json('jsona.json') #here the file is named 'jsona.json'
>>> transcript_consequences = jsona['transcript_consequences'].apply(pd.Series)
我不确定这是否有意,但似乎transcript_consequences[0]
与transcript_consequences[6]
相同。
您可以执行以下操作:
>>> phenotypes0 = pd.DataFrame(transcript_consequences.phenotypes[0]) #and so on
>>> isinstance(phenotypes0,pd.DataFrame)
True
>>> isinstance(transcript_consequences,pd.DataFrame)
True
>>> isinstance(jsona,pd.DataFrame)
True
#in order to get one dataframe, concatenate and pass the dataframes in a list, like so:
>>> pd.concat([transcript_consequences, phenotypes0], axis=1) #with more elements (more phenotypes) add them to the list