Question

我正在尝试将一段文本拆分成文本块和基于关键词分开的标题的文本列表。我认为最好的方法是递归。不幸的是，在尝试检查给定变量*** TypeError: 'str' object is not callable的类型时，我收到以下错误。当我直接调用type(var)时，我在PDB中遇到同样的错误。这似乎没有意义，所以我担心这是我看不到的东西。

以下是我认为相关的代码部分。如果您认为需要了解更多信息，请与我们联系

def separate(text,boundary = None):
    pdb.set_trace()
    if boundary == None:
        m = re.findall(r'(?<=boundary=).*',text)
        i = 0
        textList = [text]
        while i < len(m): #have all levels of Boundary/headers named
            boundary = m[i]
            textList = recursiveSplit(textList,boundary)
            i += 1

    return textList

def recursiveSplit(chunk,boundary):
    if type(chunk) is types.ListType: #error occurs here
        for object in chunk:
            recursiveSplit(object,boundary)
    if type(chunk) is types.StringType:
        list = re.split(r'(?P<boundary>)(?!--)',chunk)
        return list
    return None

完整代码。需要文本文件。您可以使用任何MIME电子邮件。我还会上传我用于测试的电子邮件

    #Textbasics email parser
#based on a "show original" file converted into text

from sys import argv
import re, os, pdb, types

script, filename = argv
text = open(filename).read()
type = "text only" #Set the default type of email

#cut the email up by sections
#--A section is defined as any time there are two line breaks in a row
textList = re.split(r"\n\n", text)
header = textList[0]
if re.search(r'MIME-Version',header):
    type = "MIME"

# If mail has no attachments, parse as a text-only email
class Parser(object):

    def __init__(self,textList):
        a = 1
        self.body = ""
        self.textList = textList
        self.header = textList[0]
        while a < len(textList):
            self.body = self.body + textList[a] + '\n\n'
            a += 1

        m = re.search(r'(?<=Subject: ).*', self.header)
        self.subject = m.group(0)

        m = re.search(r'(?<=From: ).*', self.header)
        self.fromVar = m.group(0)

        m = re.search(r'(?<=To: ).*', self.header)
        self.toVar = m.group(0)

        m = re.search(r'(?<=Date: )\w+\s\w+\s\w+', self.header)
        self.date = m.group(0)

    def returnParsed(self,descriptor = "all"):
        if descriptor == "all":
            retv = "Subject: " + self.subject + "\n" + "From: " + self.fromVar + "\n" + "To: " + self.toVar + "\n" + "Date: " + self.date + "\n" + "\n" + self.body
            return retv

        if descriptor == "subject":
            return self.subject
        if descriptor == "fromVar":
            return self.fromVar
        if descriptor == "toVar":
            return self.toVar
        if descriptor == "date":
            return self.date
        if descriptor == "body":
            return self.body

class MIMEParser(Parser):

    class MIMEDataDecoder(object):
        def __init__(self,decodeString,type):
            pass    


    def __init__(self,textList):
        self.textList = textList
        self.nestedItems = []
        newItem = NestedItem(self)
        newItem.setContentType("Header")
        newItem.setValue(self.textList[0])
        self.nestedItems.append(newItem)
        if re.search(r'(boundary=)',newItem.value):
            helperItem = NestedItem(self)
            helperItem.value = (self.textList[0])
            m = re.search(r'(?<=Content-Type: ).+(?=;)',newItem.value)
            helperItem.setContentType(m.group(0))
            self.nestedItems.append(helperItem)

        self.organizeData()   
        """i = 0
        while i < len(self.textList):
            newItem = NestedItem(self)
            ct = self.nextContentType
            newItem.setContentType(ct)
            newItem.setValue(self.textList[i])
            self.nestedItems.append(newItem)
            m = re.search(r'(?<=Content-Type: ).+(?=;)',self.textList[i])
            if m:
                self.nextContentType = m.group(0)
            i += 1
            """

    def nestItem (self,item):
        self.nestedItems.append(item)

    def organizeData(self):
        self.nestLevel = 1
        self.currentSuper = self
        m = re.search(r'(?<=boundary=).*',self.textList[0])
        self.currentBoundary = m.group(0)
        self.currentList = self.textList
        self.currentList.remove(self.textList[0])
        self.formerObjectDatabase = {}
        pdb.set_trace()
        while self.nestLevel > 0:
            i = 0
            while i < len(self.currentList):

                boundary = self.currentBoundary
                #If block is a "normal block", containing a current boundary identifier
                p = re.search(r'--(?P<boundary>)(?!--)', text)
                if p:
                    newItem = NestedItem(self.currentSuper)
                    newItem.setValue(self.currentList[i])
                    r = re.search(r'(?<=Content-Type: ).+(?=;)',newItem.value)
                    if r:
                        newItem.setContentType(r.group(0))
                    self.currentObject = newItem
                    self.currentSuper.nestItem(self.currentObject)
                #If the block contains a new block boundary
                m = re.search(r'(?<=boundary=).*',self.currentList[i])
                if m:
                    #begin new layer of recursive commands
                    newFormerObject = self.FormerCurrentObject(self.currentList,self.currentSuper,self.currentBoundary)
                    self.formerObjectDatabase[self.nestLevel] = newFormerObject
                    self.currentSuper = self.currentObject
                    self.nestLevel += 1
                    self.currentBoundary = m.group(0)
                    boundary = self.currentBoundary
                    #self.currentList = re.split(r'--(?P<boundary>)(?!--)', self.currentList[i])
                boundary = self.currentBoundary
                #If block contains an "end of boundary" marker
                q = re.search(r'(?P<boundary>)--', text)
                if q:
                    self.nestLevel -= 1
                    currentObject = self.formerObjectDatabase[self.nestLevel]
                    self.currentList = currentObject.formerList
                    self.currentSuper = currentObject.formerSuper
                    self.currentBoundary = currentObject.formerBoundary
                i += 1                    


    class FormerCurrentObject:
        def __init__(self,formerList,formerSuper,formerBoundary):
            self.formerList = formerList
            self.formerSuper = formerSuper
            self.formerBoundary = formerBoundary




    def printAll(self):
        print "printing all: %d" % len(self.nestedItems)
        i = 0
        while i < len(self.nestedItems):
            print "printing out item %d" % i
            self.nestedItems[i].printOut()
            i += 1

class NestedItem(object):
    def __init__(self,superObject,contentType=" ",value = " "):
        self.superObject = superObject
        self.contentType = contentType
        self.value = value
        self.nestedItems = []

    def nestItem(self,item):
        self.nestedItems.append(item)

    def printOut(self,printBuffer = ""):
        print printBuffer + '++%s' % self.contentType
        print printBuffer + self.value
        a = 0
        printBuffer = printBuffer + "  "
        while a < len(self.nestedItems):
            self.nestedItems[a].printOut(printBuffer)

    def setContentType(self,contentType):
        self.contentType = contentType

    def setValue(self,value):
        self.value = value



if type == "text only":
    p = Parser(textList)
    print p.returnParsed()

def separate(text,boundary = None):
    pdb.set_trace()
    if boundary == None:
        m = re.findall(r'(?<=boundary=).*',text)
        i = 0
        textList = [text]
        while i < len(m): #have all levels of Boundary/headers named
            boundary = m[i]
            textList = recursiveSplit(textList,boundary)
            i += 1

    return textList

def recursiveSplit(chunk,boundary):
    if type(chunk) is types.ListType: #<<--error occurs here
        for obj in chunk:
            recursiveSplit(obj,boundary)
    if type(chunk) is types.StringType:
        list = re.split(r'(?P<boundary>)(?!--)',chunk)
        return list
    return None


if type == "MIME":
    #separate the text file instead by its boundary identifier
    p = MIMEParser(separate(text))
    p.printAll()

Answer 1

您正在指定要键入的字符串：

type = "text only"

然后在

中调用它

if type(chunk)...

提出异常：

*** TypeError: 'str' object is not callable

Python TypeError：调用类型函数时，'str'对象不可调用

1 个答案: