对于这个烦人的问题感到抱歉,我知道有几个类似问题的主题,但我找不到任何一个解决方案。我试图运行我的代码,我收到一个错误“权限被拒绝”。有人说这是一个Windows问题而不是Python问题,但我仍无法找到解决方案。你能帮帮我吗?非常感谢。
#/usr/share/bin/python2
import re, os, sys
error_to_catch = getattr(__builtins__,'FileNotFoundError', IOError)
def splitFiles(inputPos, inputNeg, directory):
print directory
dirPos = directory + "/positive"
dirNeg = directory + "/negative"
os.mkdir(dirPos)
os.mkdir(dirNeg)
for i in range(len(inputPos)):
outputNeg = open(os.path.join(dirNeg, str(i+1) + ".txt"), "w")
outputPos = open(os.path.join(dirPos, str(i+1) + ".txt"), "w")
outputNeg.write(inputNeg[i])
outputPos.write(inputPos[i])
outputNeg.close()
outputPos.close()
def getUnigrams(sentence):
x = sentence.replace("'", "")
y = x.replace("--", "")
z = y.replace("_", " ")
wordList = re.findall("[\w']+", z)
return wordList
def arffOutput(inputPos, inputNeg, directory):
print directory
""" Creates an ARFF file from the given set of sentences.
First, split each sentences into unigrams, remove apostrophies as it's
not supported by the ARFF standard.
Then, create a joint list of tokens that are both in positive and
negative sets, and remove them from each sets.
We are thus left with lists that have tokens only in positive, only
in negative, or both.
At last, create the structure of the arff file and put each token in.
@param sentences: a list of strings.
"""
output = open(directory + "_tokens.arff", "w")
output.write("@RELATION sentiments\n")
wordListPos = []
wordListNeg = []
unigrams = []
for sentence in inputPos:
wordList = getUnigrams(sentence)
for word in wordList:
wordListPos.append(word)
unigrams.append(word)
for sentence in inputNeg:
wordList = getUnigrams(sentence)
for word in wordList:
wordListNeg.append(word)
unigrams.append(word)
wordListPos = list(set(wordListPos))
wordListNeg = list(set(wordListNeg))
unigrams = list(set(unigrams))
for word in unigrams:
output.write("@ATTRIBUTE " + str(word) + " {yes,no}\n")
output.write("@ATTRIBUTE CLASS {positive,negative}\n@data\n")
for sentence in inputPos:
outputString = ""
wordsInSentence = getUnigrams(sentence)
for word in unigrams:
if word in wordsInSentence:
outputString += "yes,"
else:
outputString += "no,"
outputString += "positive\n"
output.write(outputString)
for sentence in inputNeg:
outputString = ""
wordsInSentence = getUnigrams(sentence)
for word in unigrams:
if word in wordsInSentence:
outputString += "yes,"
else:
outputString += "no,"
outputString += "negative\n"
output.write(outputString)
output.close()
if __name__ == "__main__":
argv = sys.argv
if(len(argv) > 1):
for i in range(1, len(argv)):
fileInput = open(argv[i], "r")
f = fileInput.read()
fileInput.close()
f = f.replace("_", " ")
f = f.replace("--", " ") #in original/MR-neg.txt
charCount = len(set(f))
wordList = re.findall("[\w']+", f)
wordCount = len(wordList)
uniqueWordCount = len(set(wordList))
print "chars\twords\tunique word"
print charCount , "\t" , wordCount , "\t" , uniqueWordCount,
"\t", argv[i]
else:
directories = ["original","remove_stop_word",
"stemming_and_remove_stop_word"]
for directory in directories:
filePos =open('C:/Users/Kosio/Desktop/DataMining_B/Data/data', 'r')
fileNeg =open('C:/Users/Kosio/Desktop/DataMining_B/Data/data', 'r')
sentencesPos = filePos.readlines()
sentencesNeg = fileNeg.readlines()
filePos.close()
fileNeg.close()
splitFiles(sentencesPos, sentencesNeg, directory)
arffOutput(sentencesPos, sentencesNeg, directory)
目录中的目录: filePos = open('C:/ Users / Kosio / Desktop / DataMining_B / Data / data','r') fileNeg = open('C:/ Users / Kosio / Desktop / DataMining_B / Data / data','r')
错误行是重要的