Question

是的，所以我下载了一个Python程序来解码DNA序列。它适用于Python 2.7，但无论我做什么，pip都会将其安装到Python 3.6中。所以我添加了打印功能的括号，它变得可用。下面的命令行代码：

kevin@Kevin-Jia:~$ sudo pip install file2dna
Collecting file2dna
  Using cached file2dna-0.4.tar.gz
Installing collected packages: file2dna
  Running setup.py install for file2dna ... done
Successfully installed file2dna-0.4
kevin@Kevin-Jia:~$ sudo nano /usr/lib/python3.6/site-
packages/dna/dna.py
kevin@Kevin-Jia:~$ dna
usage: dna [-h] [-e | -s | -d | -j] file
dna: error:  the following arguments are required: file
kevin@Kevin-Jia:~$ dna -d code.dna
Traceback (most recent call last):
  File "/usr/bin/dna", line 11, in <module>
    load_entry_point('file2dna==0.4', 'console_scripts', 'dna')()
  File "/usr/lib/python3.6/site-packages/dna/dna.py", line 379, in 
  main
    dna.decode(args.file)
  File "/usr/lib/python3.6/site-packages/dna/dna.py", line 49, in 
  decode
    s4 = self.__S5_to_S4(s5)
  File "/usr/lib/python3.6/site-packages/dna/dna.py", line 294, in 
  __S5_to_S4   
    s4 = dna_inv_table[s5[i]][s5[i-1]] + s4
KeyError: '\n'
kevin@Kevin-Jia:~$

代码如下：

#!/usr/bin/python

import binascii
import csv
from random import randint
import os
import math
import zipfile
import sys
import argparse

class DNA(object):
    """
    This class implements four main methods:
        encode(file): encode an arbitrary file in DNA form as described
                    in Nature's article.
        encode_split(file): encode and prepare file for sequencing.
        decode(file): decode file from DNA.
        decode_join(file): decode splitted file.
    """

    # Huffman code dictionary
    code = {}

    # Dictionary with files 2-trits
    files_trits = {}

    def __init__(self):
        """ Populates the Huffman code dictionary """

        # Read the dictionary from data/huff3.dict
        # I made this file from the View_huff3.cd.new.correct
        huff_dict = open(os.path.join(os.path.dirname(__file__), "data/huff3.dict"), "r")
        csv_reader = csv.reader(huff_dict, delimiter=',')
        for row in csv_reader:
            self.code[row[0]] = row[1]

    def encode(self, input_file):
        """ Encode input_file in an DNA string """

        s1 = self.__S0_to_S1(input_file)
        s4 = self.__S1_to_S4(s1)
        s5 = self.__S4_to_S5(s4)
        open(input_file + '.dna', 'w').write(s5)

    def decode(self, input_file):
        """ Decode file in an DNA string """
        s5 = open(input_file, 'r').read()
        s4 = self.__S5_to_S4(s5)
        s0 = self.__S4_to_S0(s4)
        # Save s0 after conversion from hexadecimal to bytes
        open(input_file[:-4]+'.decoded', 'wb').write(binascii.unhexlify(s0))

    def decode_join(self, input_file):
        """ Decode and join DNA zip file into DNA string """
        Findex = self.__Files_to_Findex(input_file)
        Fi = self.__Findex_to_Fi(Findex)
        s5 = self.__Fi_to_S5(Fi)
        s4 = self.__S5_to_S4(s5)
        s0 = self.__S4_to_S0(s4)
        # Save s0 after conversion from hexadecimal to bytes
        open(input_file[:-13]+'.decoded', 'wb').write(binascii.unhexlify(s0))

    def encode_split(self, input_file):
        """ Encode file in many overlapping DNA string. Returns a zip file """

        s1 = self.__S0_to_S1(input_file)
        s4 = self.__S1_to_S4(s1)
        s5 = self.__S4_to_S5(s4)
        F = self.__S5_to_Fi(s5)
        Findex = self.__Fi_to_Findex(F, input_file)
        self.__Findex_to_Files(Findex, input_file)
        return Findex

    def __S0_to_S1(self, input_file):

        # Update 2-trits file dictionary (for later use: if more than one file)
        trit = self.__base10_to_base3(len(self.files_trits))
        trit = '0'  * (2 - len(trit)) + trit
        self.files_trits[input_file] = trit
        # Concatenate byte by byte to s1 (after Huffman codification)
        s1 = ""
        with open(input_file, 'rb') as f:
            byte = f.read(1)
            while byte:
                s1 = s1 + self.code[str(int(binascii.hexlify(byte), 16))]
                byte = f.read(1)
        return s1

    def __S1_to_S4(self, s1):

        # Compute s2
        n = len(s1)
        s2 = self.__base10_to_base3(n)
        s2 = '0' * (20 - len(s2)) + s2

        # Compute s3
        s3 = '0' * (-(n + len(s2) % 25) % 25)

        # Compute s4
        s4 = s1 + s3 + s2
        return s4

    def __S4_to_S5(self, s4):

        # Create table
        dna_table = {
            'A': ['C', 'G', 'T'],
            'C': ['G', 'T', 'A'],
            'G': ['T', 'A', 'C'],
            'T': ['A', 'C', 'G']
        }

        s5 = ''
        s5 = s5 + dna_table['A'][int(s4[0])]
        for c in s4[1:]:
            s5 = s5 + dna_table[s5[-1]][int(c)]

        return s5

    def __S5_to_Fi(self, s5):
        n = len(s5)
        F = []
        for i in range(0, n/25 - 3):
            # Reverse if odd
            if i % 2 == 1:
                F.append(self.__reverse_complement(s5[25*i:25*i+100]))
            else:
                F.append(s5[25*i:25*i+100])
        return F

    def __Fi_to_Findex(self, F, input_file):

        # Indexed segments
        Findex = []

        # File ID
        ID = self.files_trits[input_file]

        # Create table
        dna_table = {
            'A': ['C', 'G', 'T'],
            'C': ['G', 'T', 'A'],
            'G': ['T', 'A', 'C'],
            'T': ['A', 'C', 'G']
        }

        # For each segment compute the IX
        for i in range(0, len(F)):
            i3 = self.__base10_to_base3(i)
            i3 = '0' * (12 - len(i3)) + i3
            P = (int(ID[1-1]) + int(i3[1-1]) + int(i3[3-1]) +
                int(i3[5-1]) + int(i3[7-1]) + int(i3[9-1]) + int(i3[11-1])) % 3
            IX = ID + i3 + str(P)

            ix = ''
            ix = ix + dna_table[F[i][-1]][int(IX[0])]
            for c in IX[1:]:
                ix = ix + dna_table[ix[-1]][int(c)]

            if F[i][0] == 'A':
                F[i] = 'T' + F[i]
            elif F[i][0] == 'T':
                F[i] = 'A' + F[i]
            else:
                if randint(0,1) == 0:
                    F[i] = 'A' + F[i]
                else:
                    F[i] = 'T' + F[i]

            if ix[-1] == 'C':
                ix = ix + 'G'
            elif ix[-1] == 'G':
                ix = ix + 'C'
            else:
                if randint(0,1) == 0:
                    ix = ix + 'G'
                else:
                    ix = ix + 'C'

            Findex.append(F[i] + ix)

        return Findex

    def __Findex_to_Files(self, Findex, input_file):
        temp_dir = input_file +'.splitted'
        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)
        zf = zipfile.ZipFile(temp_dir +'.zip', 'w', zipfile.ZIP_DEFLATED)

        # Necessary number of leading zeros
        n0 = int(math.log(len(Findex), 10) + 1)

        i = 0
        for f in Findex:
            fragment_file = "{0}.{1:0{2}d}".format(os.path.basename(input_file), i, n0) 
            open(fragment_file, 'w').write(f)
            zf.write(fragment_file)
            os.remove(fragment_file)
            i = i + 1
        os.rmdir(temp_dir)

    def __Files_to_Findex(self, input_file):

        # Remove the .zip
        temp_dir = input_file[:-4]

        # Extract files to temp_dir
        zipfile.ZipFile(input_file, 'r').extractall(temp_dir)

        Findex = []
        for f in os.listdir(temp_dir):
            fragment_file = temp_dir + os.sep + f
            Findex.append(open(fragment_file, 'r').read())
            os.remove(fragment_file)

        os.rmdir(temp_dir)

        return Findex

    def __Findex_to_Fi(self, Findex):

        # Create inverse table
        dna_inv_table = {
            'A': {'T': '0', 'G': '1', 'C': '2'},
            'C': {'A': '0', 'T': '1', 'G': '2'},
            'G': {'C': '0', 'A': '1', 'T': '2'},
            'T': {'G': '0', 'C': '1', 'A': '2'}
        }

        F = [''] * len(Findex)
        for Fi in Findex:

            # Check if reverse complemented
            if Fi[0] != 'T' and Fi[0] != 'A':
                Fi = self.__reverse_complement(Fi)

            # Remove prepended A/T and appended C/G
            Fi = Fi[1:116] # Prior lenght of Fi is 117

            # Extract ix (last 15) n DNA format
            ix = Fi[-15:]
            Fi = Fi[:-15]

            # Convert ix to trits (IX)
            lastFi = Fi[-1]
            IX = dna_inv_table[ix[0]][lastFi]
            for i in range(1, 15):
                IX = IX + dna_inv_table[ix[i]][ix[i-1]]

            # Extract ID
            ID = IX[:2]

            # Extract i3 and i
            i3 = IX[2:len(IX)-1]
            i = self.__base3_to_base10(i3)

            # Checksum error
            P = int(IX[-1])
            Pexpected = (int(ID[1-1]) + int(i3[1-1]) + int(i3[3-1]) + 
                        int(i3[5-1]) + int(i3[7-1]) + int(i3[9-1]) + int(i3[11-1])) % 3
            if P != Pexpected:
                print("Corrupted segment:\nID = %s\ni = %d") %(ID, i)
            else:
                # Save Fi
                if i % 2 == 1:
                    F[i] = self.__reverse_complement(Fi)
                else:
                    F[i] = Fi
        return F

    def __Fi_to_S5(self, Fi):

        # In real applications we should check if the overlapping
        # parts are equal. I won't do that for now

        s5 = Fi[0][0:75]
        for f in Fi:
            s5 = s5 + f[-25:]
        return s5

    def __S5_to_S4(self, s5):

        # Create inverse table
        dna_inv_table = {
            'A': {'T': '0', 'G': '1', 'C': '2'},
            'C': {'A': '0', 'T': '1', 'G': '2'},
            'G': {'C': '0', 'A': '1', 'T': '2'},
            'T': {'G': '0', 'C': '1', 'A': '2'}
        }

        s4 = ''
        for i in range(len(s5) -1, 0,-1):
            s4 = dna_inv_table[s5[i]][s5[i-1]] + s4
        s4 = dna_inv_table[s5[0]]['A'] + s4

        return s4

    def __S4_to_S0(self, s4):

        # s2 is the last 20 trits
        s2 = s4[-20:]

        # n = len(s1) is s2 converted to base 10 
        n = self.__base3_to_base10(s2)

        # s1 is the first n trits
        s1 = s4[:n]

        # Time to convert s1 from trits to bytes in s0
        inverted_code = dict([v,k] for k,v in self.code.items())
        s0 = ''
        i = 0
        while i != n:
            if s1[i:i+5] in inverted_code.keys():
                s0 = s0 + ''.join('%02x' % int(inverted_code[s1[i:i+5]]))
                i = i + 5
            else:
                s0 = s0 + ''.join('%02x' % int(inverted_code[s1[i:i+6]]))
                i = i + 6

        return s0

    def __base10_to_base3(self,n):
        """ Input: int and Output: str """
        if n == 0:
            return '0'

        s = ''
        while n != 0:
            s =  str((n % 3)) + s
            n = n/3
        return s

    def __base3_to_base10(self,n):
        """ Input: str and Output: int """
        n = int(n)
        if n == 0:
            return 0

        res = 0
        b = 1
        while n != 0:
            res = res + (n % 10) * b
            n = n/10
            b = 3 * b
        return res
        # return int(n , 3) <= Hahaha

    def __reverse_complement(self,s):
        reverse = ''

        complement_dict = {'A': 'T', 'T': 'A', 'C':'G', 'G': 'C'}

        for c in s:
            reverse = complement_dict[c] + reverse

        return  reverse

def main():
    parser = argparse.ArgumentParser(prog='dna')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-e', help='encode file and save it as .dna', action="store_true")
    group.add_argument('-s', help='encode file and save it as .splitted.zip', action="store_true")
    group.add_argument('-d', help='decode .dna file and save as .decoded', action="store_true")
    group.add_argument('-j', help='decode .splitted.zip file and save it as .decoded', action="store_true")
    parser.add_argument('file', type=str, help='File to be encoded/decoded.')
    args = parser.parse_args()

    dna = DNA()
    if args.e:
        dna.encode(args.file)
    elif args.s:
        dna.encode_split(args.file)
    elif args.d:
        if args.file[-4:] != '.dna':
            print("I only decode files terminated in .dna!")
            sys.exit(1)
        dna.decode(args.file)
    elif args.j:
        if args.file[-13:] != '.splitted.zip':
            print("I only decode and join files terminated in .splitted.zip!")
            sys.exit(1)
        dna.decode_join(args.file)
    else:
        parser.print_help()

if __name__ == '__main__':
    main()

所以有人知道如何解决这个错误 - 我已经尝试了一切吗？

Answer 1

您的文件＆＃34; code.dna＆＃34;不符合python脚本期望的语法。该文件包含换行符（＆＃34; \ n＆＃34;），脚本的程序员没有考虑。它可能是文件＆＃34; code.dna＆＃34;使用Windows样式的换行符，而脚本现在在Linux上运行？但这只是猜测。数据文件可能与脚本期望的完全不同。

Python - KeyError：'\ n'

1 个答案: