Question

我正在编写一些代码来标记文件，该代码查看前一行以查看是否存在SCI标记，如果是，则在第五列（在制表符分隔文件中）用SCI_NXT标记当前行。

然而，我得到属性错误，我试图去除一个列表（在第一行不是一个项目行的行是previous_line = split_line（previous_line）是变量的对象。我理解是因为它将行写为列表，而不是字符串，但我不明白我如何纠正这一点。我尝试使用“extend”但这导致第一行被写为每个字符是不同的元素，也不是我想做的事。

以下是我正在研究的测试文本：

</s>
<s>
Diptera NP  Diptera-n        SCI
was VBD be-v
the DT  the-x
most    RBS most-a
common  JJ  common-j
prey    NN  prey-n
among   IN  among-i
the DT  the-x
insects NNS insect-n
potentially RB  potentially-a
available   JJ  available-j
to  IN  to-i

以下是代码：

"""Tags a file with NEXT_SCI in extra feature column. Reads and writes vert files.
"""
import json

#from pip._vendor.pyparsing import line


VFILE = 'test_next.vert'

def split_line(line):
    """Split a line into five parts, word, tag, lempos, ti, sci"""
    # TODO: Speak to Diana about the spaces in the vert file - do they mean
    # anything?
    line = line.strip().split()
    if len(line) == 1:
        word = line[0]
        pos, lempos, tag = None, None, None
    elif len(line) == 3:
        word, pos, lempos = line
        tag = None
    elif len(line) == 4:
        word, pos, lempos, tag = line
    return [word, pos, lempos, tag]

def tag_next_sci(lines):
    """Loops through lines of original document to add to new file (tagged)
    """
    taggedlines = []
    for line in lines:
        taggedlines.append(tagline_next_sci(line, taggedlines))
    return taggedlines


def tagline_next_sci(line, taggedlines):
    """Assigns an indicator tag to a line
    """
    #<> are structural and do not need to be considered for feature tags so can be committed directly
    if line.startswith('<'):
        return line
    #look back at previous line to see if SCI, if so tag current line
    previous_line  = taggedlines[-1]
    previous_line = split_line(previous_line)
    line = split_line(line)
    #look at last column. if SCI, print line, go to next line and add tag in final column ("\t\t\tNXT_SCI\n")
    if previous_line[-1] == "SCI":
        if len(line) == 3:
            print(line + "\t\t\tSCI_MOD\n")
            return(line + "\t\t\tSCI_MOD\n")
        if len(line) == 4:
            print(line + "\t\tSCI_MOD\n")
            return(line + "\t\tSCI_MOD\n")
    return line

def read_vfile(fname):
    """Reads a vert file
    """
    with open(fname, 'r') as vfile:
        lines = vfile.readlines()
        return lines

def write_vfile(fname, taggedlines):
    """Writes a vert file
    """
    # write to file
    with open(fname, 'w') as outfile:
        outfile.writelines(taggedlines)

def tag_vert_sci_next(fname, fname_out):
    """Creates a new file with tags
    """
    # read vertical file
    lines = read_vfile(fname)
    # tag file    
    taggedlines = tag_next_sci(lines)
    # call write file
    write_vfile(fname_out, taggedlines)

def main(fname, fname_out):
    #call sci_next tagging
    tag_vert_sci_next('test_next.vert', fname_out)

if __name__ == "__main__":
    main('test_next.vert', 'zenodo_tagged_SCI_MOD.vert')

引用错误：

Traceback (most recent call last):
  File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 123, in <module>
    main('test_next.vert', 'zenodo_tagged_SCI_MOD.vert')
  File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 120, in main
    tag_vert_sci_next('test_next.vert', fname_out)
  File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 78, in tag_vert_sci_next
    taggedlines = tag_next_sci(lines)
  File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 31, in tag_next_sci
    taggedlines.append(tagline_next_sci(line, taggedlines))
  File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 43, in tagline_next_sci
    previous_line = split_line(previous_line)
  File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 14, in split_line
    line = line.strip().split()
AttributeError: 'list' object has no attribute 'strip'

Answer 1

您的问题似乎是tagline_next_sci有时返回列表而不是字符串。例如，我尝试在函数内部放置一个打印件以查看发生了什么;

...
def tagline_next_sci(line, taggedlines):
    print('taggedlines', taggedlines)
    """Assigns an indicator tag to a line
    """
...

并获得输出

taggedlines []
taggedlines ['</s>\n']
taggedlines ['</s>\n', '<s>\n']
taggedlines ['</s>\n', '<s>\n', ['Diptera', 'NP', 'Diptera-n', 'SCI']]

所以你应该检查函数的底部以确保你总是返回一个字符串，如果你需要把你的列表拼凑成一个字符串，可以做一个"\t".join(line)，例如

return line if isinstance(line, str) else "\t".join(line)

Answer 2

谢谢大家的帮助。这是我最终得到的代码：

    """Tags a file with SCI_MOD in extra feature column. Reads and writes vert files.
"""
import json


VFILE = 'zenodotaggedWS_ALL.vert'

def split_line(line):
    """Split a line into its parts"""
    line = line.strip().split()
    if len(line) == 1:
        word = line[0]
        pos, lempos, tag ="", "", ""
    elif len(line) == 3:
        word, pos, lempos = line
        tag = ""
    elif len(line) == 4:
        word, pos, lempos, tag = line
    return [word, pos, lempos, tag]

def tag_next_sci(lines):
    """Loops through lines of original document to add to new file (tagged)
    """
    taggedlines = []
    for line in lines:
        taggedlines.append(tagline_next_sci(line, taggedlines))
    return taggedlines


def tagline_next_sci(line, taggedlines):
    """Assigns an indicator tag to a line
    """
    #<> are structural and do not need to be considered for feature tags so can be committed directly
    if line.startswith('<'):
        return line
    #look back at previous line to see if SCI, if so tag current line
    previous_line  = taggedlines[-1]
    previous_line = split_line(previous_line)
    line = split_line(line)
    if previous_line[2] == "SCI-n":
            print("\t".join(line) + "\tSCI_MOD\n")
            return "\t".join(line) + "\tSCI_MOD\n"
    return line + "\n" if isinstance(line, str) else "\t".join(line) + "\n"

def read_vfile(fname):
    """Reads a vert file
    """
    with open(fname, 'r') as vfile:
        lines = vfile.readlines()
        return lines

def write_vfile(fname, taggedlines):
    """Writes a vert file
    """
    # write to file
    with open(fname, 'w') as outfile:
        outfile.writelines(taggedlines)

def tag_vert_sci_next(fname, fname_out):
    """Creates a new file with tags
    """
    # vertical file location
    # make list of species names
    # read vertical file
    lines = read_vfile(fname)
    # tag file    
    taggedlines = tag_next_sci(lines)
    # call write file
    write_vfile(fname_out, taggedlines)

def main(fname, fname_out):
    #call sci_next tagging
    tag_vert_sci_next('zenodotaggedWS_ALL.vert', fname_out)

if __name__ == "__main__":
    main('zenodotaggedWS_ALL.vert', 'zenodo_tagged_SCIMOD2.vert')

属性错误列表对象没有属性条

2 个答案: