我试图制作200个随机DNA序列,但我无法弄清楚如何制作200个DNA序列!这就是我到目前为止所拥有的:
from random import random
def randABCD(n, probA, probT, probC, probG):
# where probA + probT + probC + probG == 1
# n = number of characters in string
# pX = probability of the character
cA = probA
cT = cA + probT
cC = cT + probC
def choose():
r = random()
if r < cA:
return 'A'
elif r < cT:
return 'T'
elif r < cC:
return 'C'
else:
return 'G'
return ''.join([choose() for i in xrange(n)])
答案 0 :(得分:1)
此代码将利用您的函数,它将生成200个长度为10的序列。注意我添加了一行随机导入并将调用更改为随机到random.random。我也将xrange更改为range,因为我使用Python 3.x测试了代码。
如果您对输出有不同的想法,请告诉我
import random
def randABCD(n, probA, probT, probC, probG):
# where probA + probT + probC + probG == 1
# n = number of characters in string
# pX = probability of the character
cA = probA
cT = cA + probT
cC = cT + probC
def choose():
r = random.random()
if r < cA:
return 'A'
elif r < cT:
return 'T'
elif r < cC:
return 'C'
else:
return 'G'
return ''.join([choose() for i in range(n)])
print(randABCD(10, .25, .25, .25, .25))
print([randABCD(10, .25, .25, .25, .25) for i in range(200)])
输出
从第一次打印电话 AACCGTCTCT
来自第二次打印电话 ['CCAGTTCGGA','ACGGGAAAGT','CGTGGTAAGT','AACGATTGAG','GAGGATATGC','AGTGCCTTGT','TGACTTGCAC','GAAGGAGGCA','TCCGGTAGTT','TCTGCCGTCG','TACATAAGTC','GCTGGTTAAC',' CACCCAGGCC','CAAGAGCCAA','GCTATTCGAT','GTGCTCATCT','AAAGCAATAC','GTATGGAAAC','GTATTGGTAA','TGTAATCTTA','TCGAATACAT','TCCTCAATGG','TGTAACGGCA','TAGTCACTGT','CAAAGCTCAT' ,'GTTGAAAGTC','CTATCATGAG','CAAGCACTAT','CTGGGCTGCC','CATGTCCAGG','ACGTGTGATC','AATATGCAAC','ACTGATGGAT','TATCGCGCGA','GTAGACCCAA','CAGGATGCAT','TACGGCAGAG',' TATTTTATCA','GGTAATCACA','TAAACGTATG','CTTCCACGCG','GGCTCCAAAA','CAAGAATAAC','TCACGGTCTT','AGCGCGTCGA','TCACTATCAT','TCTGATGTCA','AGAAGGTCGT','TTAGCGTCTC','TGAGATGCGA' ,'ATACCCATGC','ACCGCTCGAG','CCATCAGGCC','AACCTTCCCG','TCACTCGGGT','CGAGACCGGA','GCAAGATGAT','TGCAATGAGG','CCAGATTGGT','GGCGATGACA','TAGTATGGTT','GCAGGTCTCG',' GGTTTTAACC','ACAACCAACT','CTGTTCAGTT','TTGGAGAGTA','AGTCGATCTG','TAATGGCAGG','CGTCCTTTAA','GCGCAACTTC','CGGTAGAATG','TCTA GCTTGC','GCGAAAGCGC','GACCCCCGGC','GCAATAGTCT','ATTGACTCCT','ACTAACGCTT','TCATAGAAGC','GTAGCTGCGT','ACAATCTCCT','TCGACTCTCT','GGCAACAGCA','TATTGTAGAC','GAGGTCAACG' ,'ATGCCAGGGA','CTCTCTTTCT','CTGCGTGATA','GCAAGAATAC','AATGCATGAC','AGTTCAGGCA','GAGATTCCCC','CCGCCGACCA','ACGACGTGCA','TGAACGCCAA','GTCGGCTATT','TGCTTATCAA',' AGGAGGCACG','TCTACTGCGA','GCCTTGACAT','GCCTCTCCCC','ACACCGACTG','ATTTAATCAT','GTGCAACGTC','GTGTGGCTAA','TGGCGATTAA','GTATGTCTCC','ACTTATGGGC','GCTACGTTTT','ATCCTCACGT' ,'GCCGGCTACA','CCCGTGAAGA','CATGACCACT','GAACCTGATG','ACGAGTGTCA','ATGTTGGTTT','CTTGGAATGA','CTTTCCTCAC','GATGCTCTTT','TAATTCTAAT','TCTGGCAAAG','CCAGGCCGCG',' TCATCGCACA','CAGCAAGATT','GCTTAGGAGG','ATATTGTGCG','AATATGACGG','TCTAGTCCCT','GTAAACCGGA','TTTAGCGTAC','CGAATAGAAC','TTAGAATCGG','CGCGCGCCTC','AATGTTAAGG','ACTCGACGCA' ,'GGTTGCTTAC','TCTGGTGCTC','TAATTAGGTA','GCCTTAGAAG','GTTTATACGC','AGCGTCCATA','GTATTGTCGA','CACCTCAGAA','CCCACCTCCG ','AGACGCTAGA','CAAAGCCAGA','TCAAATTCAT','GGCCATTTGT','CAACATGGTA','CAAGTGTAAG','CGCCGTAACC','GGGCGGTAAT','GTCCAACCAC','CGAAGCGCAG','GGCGTCGGAG','CTTGTCGCGG', 'GCCCTTCTGC','TGCAGCCAAC','TGATTTGTTC','TGAATTCAGT','TAGTCCTGCT','GCCCTATGGG','CCAGGCTGTT','TCCTCAAAAC','CTACGGGCAT','GCCAACCGAG','CAATGGAACT','CCTTATCCTC','TAAAAGGCTA ','CGCGTGACAC','TGGCGAGCGT','CCCCGAGCAT','CAGCATTCAA','TGTACTGTCC','TCCTTGGTTA','TCAAAGATGT','CGCATACTCA','GAATCTATTT','ATCATAAGGT','ACGCTCTCGC','GTCCTCTTAA', 'ATCCGAACCT','TGGACTTCCG','TCAGATGATA','ACTTCATGCG','TCCTATACAA','ATGGTCTTTA','CTAATTCGGT','TGCACCACAT','GTGACCGTCT','ACGTCAGTCA','TCTTCCACCT','CCGAATACGC','ACTATGTCGT ','TACTATTCCC','GGGGGACGCA','TGCAGGTTCT','GGCTCTGGGG','TGGAGCGCTC','CGAGATCTTA','GGCTTGGCAT']