我有以下功能:
def sample_handling(sample, lexicon, classification):
featureset = []
with open(sample, 'r') as f:
contents = f.readlines()
for l in contents[:hm_lines]:
current_words = word_tokenize(l.lower())
current_words = [lemmatizer.lemmatize(i) for i in current_words]
features = np.zeros(len(lexicon))
for word in current_words():
if word.lower() in lexicon:
index_value = lexicon.index(word.lower())
features[index_value] += 1
features = list(features)
featureset.append([features, classification])
return featureset
当我运行代码时,它会给我以下错误:
TypeError:' list'对象不可调用
这里有没有黯然失色?我在SO上跟踪了很多线程处理这个错误,但无法解决我的问题。
这是我的完整代码:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import numpy as np
import random
import pickle
from collections import Counter
lemmatizer = WordNetLemmatizer()
hm_lines = 10000000
def create_lexicon(pos, neg):
lexicon = []
for fi in [pos, neg]:
with open(fi, 'r') as f:
contents = f.readlines()
for l in contents[:hm_lines]:
all_words = word_tokenize(l.lower())
lexicon += list(all_words)
lexicon = [lemmatizer.lemmatize(i) for i in lexicon]
w_counts = Counter(lexicon)
#w_counts = {'the': 52521, 'and': 25242}
l2 = []
for w in w_counts:
if 1000 > w_counts[w] > 50:
l2.append(w)
print(l2)
return l2
def sample_handling(sample, lexicon, classification):
featureset = []
with open(sample, 'r') as f:
contents = f.readlines()
for l in contents[:hm_lines]:
current_words = word_tokenize(l.lower())
current_words = [lemmatizer.lemmatize(i) for i in current_words]
features = np.zeros(len(lexicon))
for word in current_words():
if word.lower() in lexicon:
index_value = lexicon.index(word.lower())
features[index_value] += 1
features = list(features)
featureset.append([features, classification])
return featureset
def create_feature_sets_and_lables(pos, neg, test_size = 0.1):
lexicon = create_lexicon(pos, neg)
features = []
features += sample_handling('pos.txt', lexicon,[1,0])
features += sample_handling('neg.txt', lexicon,[0,1])
random.shuffle(features)
features = np.array(features)
testing_size = int(test_size * len(features))
train_x = list(features[:,0][:-testing_size])
train_y = list(features[:,1][:-testing_size])
test_x = list(features[:,0][-testing_size:])
test_y = list(features[:,1][-testing_size:])
return train_x, train_y, test_x, test_y
if __name__ == '__main__':
train_x, train_y, test_x, test_y = create_feature_sets_and_lables('pos.txt', 'neg.txt')
with open('sentiment_set.pickle', 'wb') as f:
pickle.dump([train_x, train_y, test_x, test_y], f)
答案 0 :(得分:0)
好吧,要开始调试,我会使用
运行程序python -m pdb whatever_your_file_is.py
这将启动一个pdb调试控制台。在那里,按' c '运行程序。一段时间后,假设程序崩溃,您将在发生错误的确切位置停止。
从那里,您可以参考this或this(只是Google python pdb )来确定代码中发生了什么。< / p> 祝你好运!
答案 1 :(得分:0)
如果打印完整的堆栈跟踪会更有帮助。由于这是一个相对简单的错误,因此在这种情况下很容易识别问题。这就是这条线,
for word in current_words():
在循环播放列表时,您无需调用列表。这样就可以了,
for word in current_words: