Question

我遇到一个问题，让火车功能在python中正常工作。我无法修改def功能。我正处于需要让第二个文件一次为PosList读取一行的地方，我需要在OpenPos中匹配movieWordCount [z]的值。如果文件在那里，那么我很好用第二行（用空格分割）来填充第2列。如果不是，那么我需要else将其附加到文件末尾。这是行不通的。如果缺少值，它不会附加值，我不确定它是否会找到值。我被困在工作两天了。

以下是我正在处理的代码段：

with open("PosList") as OpenPos:
    lines = OpenPos.readlines()
    print lines
    if movieWordCount[z] in lines:
        print "found"

    #Now use tokenize to split it apart by space and set to new array for me to call column2
    else:
        print "not found"
        lines.append(movieWordCount[z] + " 1" + "\n")

这是我的完整代码：

#!/usr/bin/python

#Import Counter
import collections
from collections import Counter
#Was already here but pickle is used for data input and export
import math, os, pickle, re

class Bayes_Classifier:

 def __init__(self, trainDirectory = "movie_reviews/"):

    #If file listing exists skip to train
    if os.path.isfile('iFileList'):
        print "file found"
        self.train()
        #self.classify()

    #If file listing does not exist skip to train
    if not os.path.isfile('iFileList'):
        print "no file"
        newfile = 'iFileList'
        tempList = set()
        subDir = './movie_reviews'
        for filenames in os.listdir(subDir):
            my_sub_path = os.path.join(os.sep,subDir,filenames)
            tempList.add(filenames)
            self.save("filenames", "try3")
        f = []
        for fFileObj in os.walk("movie_reviews/"):
            f.extend(fFileObj)
            break
        pickle.dump(f, open( "save.p", "wb" ))
        self.save(f, "try4")

        with open(newfile, 'wb') as fi:
            pickle.dump(tempList, fi)
            #print tempList

        self.train()
        #self.classify()

 def train(self):    
    '''Trains the Naive Bayes Sentiment Classifier.'''
    print "File ready for training"
    #Open iFileList to use as input for opening movie files
    x = 0
    OpenIFileList = open('iFileList','r')
    print "iFileList now Open"
    #Loop through the file
    for line in OpenIFileList:
        #print "Ready to read lines"
        #print "reading line " + line
        if x > 4:
            if x % 2 == 0:
                #print line
                s = line
                if '-' in s:
                    comp = s.split("'")
                    #print comp[2]
                    print comp[1] #This is What you need for t he movie file
                    compValue1 = comp[1]
                    #Determine Positive/Negative.
                    #compType is the variable I am storing it to.
                    compType = compValue1.split("-",2)[1]
                    #print compType  #Prints that middle value like 5 or 1
                    #  This will do the work based on the value.
                    if compType == '5':
                    #    print "you have a five"  #Confirms the loop I am in.
                        #If file does not exists create it
                        if not os.path.exists('PosList'):
                            print "no file"
                            file('PosList', 'w').close()
                        #Open file that needs to be reviewed for word count
                        compValue2 = "movie_reviews/" + compValue1
                        print compValue2  #Prints the directory and file path
                        OpenMovieList = open(compValue2,'r')
                        for commentLine in OpenMovieList:
                            commentPositive = commentLine.split(" ")
                            commentPositiveCounter = Counter(commentPositive)
                            #print commentPositiveCounter # " Comment Pos goes here"
                            #if commentLine != '' or commentLine != ' ':
                            #Get first word, second word, ....
                            if commentLine and (not commentLine.isspace()):
                                movieWordCount = self.tokenize(commentLine)
                                y = len(movieWordCount) #determines length of string
                                print y
                                z = 0
                                #print movieWordCount[0]  # Shows the zero position in the file.
                                while z < y:
                                    print "position " + str(z) + " word is " + movieWordCount[z] # Shows the word we are at and position id

                                    with open("PosList") as OpenPos:
                                        lines = OpenPos.readlines()
                                        print lines
                                        if movieWordCount[z] in lines:
                                            print "found"
                                        else:
                                            print "not found"
                                            lines.append(movieWordCount)


                                    z = z + 1

                        #Close the files
                        OpenMovieList.close()
                        OpenPos.close()


        x += 1
        #for line2 in OpenIFileList.readlines():
        #for line in open('myfile','r').readlines():
            #do_something(line)

    #Save results
    #Close the File List
    OpenIFileList.close()



 def loadFile(self, sFilename):
    '''Given a file name, return the contents of the file as a string.'''

    f = open(sFilename, "r")
    sTxt = f.read()
    f.close()
    return sTxt

 def save(self, dObj, sFilename):
    '''Given an object and a file name, write the object to the file using pickle.'''

    f = open(sFilename, "w")
    p = pickle.Pickler(f)
    p.dump(dObj)
    f.close()

 def load(self, sFilename):
    '''Given a file name, load and return the object stored in the file.'''

    f = open(sFilename, "r")
    u = pickle.Unpickler(f)
    dObj = u.load()
    f.close()
    return dObj

 def tokenize(self, sText): 
    '''Given a string of text sText, returns a list of the individual tokens that 
    occur in that string (in order).'''

    lTokens = []
    sToken = ""
    for c in sText:
        if re.match("[a-zA-Z0-9]", str(c)) != None or c == "\'" or c == "_" or c == '-':
            sToken += c
        else:
            if sToken != "":
                lTokens.append(sToken)
                sToken = ""
            if c.strip() != "":
                lTokens.append(str(c.strip()))

    if sToken != "":
        lTokens.append(sToken)

    return lTokens

Answer 1

要打开文件进行书写，可以使用

with open('PosList', 'w') as Open_Pos

当您使用with表单时，您不需要关闭该文件; Python会在with-block的末尾为你做这件事。

因此，假设您向lines变量添加数据的方式是正确的，您可以删除多余的代码OpenMovieList.close()和OpenPos.close()，并在代码中附加2行：

with open("PosList") as OpenPos:
    lines = OpenPos.readlines()
    print lines
    if movieWordCount[z] in lines:
        print "found"
    else:
        print "not found"
        lines.append(movieWordCount)
with open("PosList", "w") as OpenPos:
    OpenPos.write(lines)

用于打开数据并将数据保存到文件的Python编码

1 个答案: