请不要将此问题标记为'重复'。我已经尝试了所有其他解决方案,但我仍然收到此错误。
朋友们,我正在使用Pycharm中的django-python开发一个短信垃圾邮件检查程序Web应用程序。我收到'属性错误' 。当我在终端上运行这个程序它工作正常时,但是当我在pycharm上运行这个程序时,我收到了这个错误。任何人都可以回答这个问题吗?
SmsSpamCheck.py文件
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import wordpunct_tokenize
import cPickle
import csv
import os
import pandas as pd
from django.shortcuts import render
from sklearn.cross_validation import StratifiedKFold, train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from textblob import TextBlob
from spamchecker.models import TextEntry
# add path to NLTK file
#STOP = set(stopwords.words('english'))
nltk.data.path.append('nltk_data')
# load stopwords
stopwords = set(stopwords.words('english'))
SPAMMESSAGES = pd.read_csv('/home/gaurav/PycharmProjects/spamchecker/spamchecker/SMSSpamCollection', sep='\t',quoting=csv.QUOTE_NONE, names=["label", "message"])
# Preprocessing
def split_tokens(message):
message = unicode(message, 'utf8')
return TextBlob(message).words
def split_lemmas(message):
message = unicode(message, 'utf8').lower()
words = TextBlob(message).words
return [word.lemma for word in words]
def split_stopwords(message):
message = unicode(message, 'utf8').lower()
words = TextBlob(message).words
return [word.lemma for word in words if word not in stopwords]
# Training
def train_multinomial_nb(messages):
# split dataset for cross validation
msg_train, msg_test, label_train, label_test = train_test_split(messages['message'], messages['label'],
test_size=0.2)
# create pipeline
pipeline = Pipeline([('bow', CountVectorizer(analyzer=split_stopwords)), ('tfidf', TfidfTransformer()),
('classifier', MultinomialNB())])
# pipeline parameters to automatically explore and tune
params = {
'tfidf__use_idf': (True, False),
'bow__analyzer': (split_lemmas, split_tokens, split_stopwords),
}
grid = GridSearchCV(
pipeline,
params, # parameters to tune via cross validation
refit=True, # fit using all data, on the best detected classifier
n_jobs=-1,
scoring='accuracy',
cv=StratifiedKFold(label_train, n_folds=5),
)
# train
nb_detector = grid.fit(msg_train, label_train)
print ""
predictions = nb_detector.predict(msg_test)
print ":: Confusion Matrix"
print ""
print confusion_matrix(label_test, predictions)
print ""
print ":: Classification Report"
print ""
print classification_report(label_test, predictions)
# save model to pickle file
file_name = '/home/gaurav/PycharmProjects/spamchecker/spamchecker/sms_spam_nb_model.pkl'
with open(file_name, 'wb') as fout:
cPickle.dump(nb_detector, fout)
print 'model written to: ' + file_name
def train_svm(messages):
# split dataset for cross validation
msg_train, msg_test, label_train, label_test = train_test_split(messages['message'], messages['label'],
test_size=0.2)
# create pipeline
pipeline = Pipeline(
[('bow', CountVectorizer(analyzer=split_stopwords)), ('tfidf', TfidfTransformer()), ('classifier', SVC())])
# pipeline parameters to automatically explore and tune
params = [
{'classifier__C': [1, 10, 100, 1000], 'classifier__kernel': ['linear']},
{'classifier__C': [1, 10, 100, 1000], 'classifier__gamma': [0.001, 0.0001], 'classifier__kernel': ['rbf']},
]
grid = GridSearchCV(
pipeline,
param_grid=params, # parameters to tune via cross validation
refit=True, # fit using all data, on the best detected classifier
n_jobs=-1,
scoring='accuracy',
cv=StratifiedKFold(label_train, n_folds=5),
)
# train
svm_detector = grid.fit(msg_train, label_train)
print ""
print ":: Confusion Matrix"
print ""
print confusion_matrix(label_test, svm_detector.predict(msg_test))
print ""
print ":: Classification Report"
print ""
print classification_report(label_test, svm_detector.predict(msg_test))
# save model to pickle file
file_name = '/home/gaurav/PycharmProjects/spamchecker/spamchecker/sms_spam_svm_model.pkl'
with open(file_name, 'wb') as fout:
cPickle.dump(svm_detector, fout)
print 'model written to: ' + file_name
def predictmessage(message):
nb_detector = cPickle.load(open('/home/gaurav/PycharmProjects/spamchecker/spamchecker/sms_spam_nb_model.pkl'))
svm_detector = cPickle.load(open('/home/gaurav/PycharmProjects/spamchecker/spamchecker/sms_spam_svm_model.pkl'))
nb_predict = nb_detector.predict([message])[0]
svm_predict = svm_detector.predict([message])[0]
if (nb_predict == 'spam'):
TextEntry.objects.create(text_type='sms', text_content=message, text_result='SPAM')
else:
TextEntry.objects.create(text_type='sms', text_content=message, text_result='HAM')
return 'SVM as ' + svm_predict + ' AND Naive Bayes as ' + nb_predict
class SpamCheck():
def spamchecksmsResult(self, message):
# check if models exist, if not run training
if (os.path.isfile('/home/gaurav/PycharmProjects/spamchecker/spamchecker/sms_spam_nb_model.pkl') == False):
print "Creating Naive Bayes Model....."
train_multinomial_nb(SPAMMESSAGES)
if (os.path.isfile('/home/gaurav/PycharmProjects/spamchecker/spamchecker/sms_spam_svm_model.pkl') == False):
print ""
print "Creating SVM Model....."
train_svm(SPAMMESSAGES)
prediction = predictmessage(message)
output = 'Entered text is predicted by', prediction
return output
This is how my project directory is 的回溯
AttributeError at /spamsmscheck/
'module' object has no attribute 'split_stopwords'
Request Method: POST
Request URL: http://127.0.0.1:8000/spamsmscheck/
Django Version: 1.11rc1
Exception Type: AttributeError
Exception Value:
'module' object has no attribute 'split_stopwords'
Exception Location: /home/gaurav/PycharmProjects/spamchecker/spamchecker/SmsSpamCheck.py in predictmessage, line 121
Python Executable: /usr/bin/python2.7
Python Version: 2.7.11
Python Path:
['/home/gaurav/PycharmProjects/spamchecker',
'/usr/lib/python2.7/site-packages/Django-1.11rc1-py2.7.egg',
'/usr/lib/python2.7/site-packages/pytz-2016.10-py2.7.egg',
'/home/gaurav/PycharmProjects/spamchecker',
'/usr/lib64/python27.zip',
'/usr/lib64/python2.7',
'/usr/lib64/python2.7/plat-linux2',
'/usr/lib64/python2.7/lib-tk',
'/usr/lib64/python2.7/lib-old',
'/usr/lib64/python2.7/lib-dynload',
'/usr/lib64/python2.7/site-packages',
'/usr/lib/python2.7/site-packages']
Server time: Sun, 17 Sep 2017 11:58:34 +0000
注意:我还没有在django-project中使用过应用程序。我尝试在django-app下的django-app中使用上面的代码,我得到同样的错误。