frequencies = {}
with open('GBM_Fusion_List.txt') as fileIn:
line = fileIn.readline()
while line:
line = line.split('&')
fl0 = frequencies.get(line[0],0)
fl1 = frequencies.get(line[1],0)
if fl0 == 0:
frequencies[line[0]] = 1
elif fl0 > 0:
frequencies[line[0]] = fl0 + 1
elif fl1 == 0:
frequencies[line[1]] = 1
else:
frequencies[line[1]] = fl1 +1
line = fileIn.readline()
for key, value in frequencies.items():
print('{}\t{}'.format(key,value))
这是我一直在努力的问题。我是python的新手,并开始了新的实习。
编写一个python脚本以计算融合基因中每个伙伴基因的频率和百分比。
GeneA&GeneB
GeneA&GeneC
GeneD&GeneF
.
.
输出包括基因及其伴侣的名称和频率以及每个伴侣的百分比。
示例输出:
GeneA 2 GeneB 1 50% GeneC 1 50%
GeneD 1 GeneF 1 100%
答案 0 :(得分:1)
这可能会有所帮助,我已尝试使其易于理解
frequencies = {}
with open('GBM_Fusion_List.txt') as file:
rows = file.read().split()
for fusion_gene in rows:
first_gene, second_gene = fusion_gene.split('&')
if first_gene in frequencies:
frequencies[first_gene]['count'] += 1
partner_genes = frequencies[first_gene]['partner_genes']
if second_gene in partner_genes:
partner_genes[second_gene] += 1
else:
partner_genes[second_gene] = 1
else:
frequencies[first_gene] = {
'count': 1,
'partner_genes': { second_gene: 1}
}
for frequency in frequencies:
print('{first_gene} {first_gene_count}'.format(first_gene=frequency,first_gene_count=frequencies[frequency]['count']), end='')
gene_occurance = 0
for _, freq in frequencies[frequency]['partner_genes'].items():
gene_occurance += freq
for gene, freq in frequencies[frequency]['partner_genes'].items():
percentage = ( freq / gene_occurance) * 100
print(' {gene} {freq} {percent}%'.format(gene=gene, freq=freq, percent=percentage ), end='')
print()