我收到了属性错误':' module'对象没有属性' split_stopwords'在django-python应用程序中

时间:2017-09-17 04:34:07

标签: python django pycharm attributeerror naivebayes

请不要将此问题标记为'重复'。我已经尝试了所有其他解决方案,但我仍然收到此错误。

朋友们,我正在使用Pycharm中的django-python开发一个短信垃圾邮件检查程序Web应用程序。我收到'属性错误' 。当我在终端上运行这个程序它工作正常时,但是当我在pycharm上运行这个程序时,我收到了这个错误。任何人都可以回答这个问题吗?

SmsSpamCheck.py文件

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import wordpunct_tokenize
import cPickle
import csv
import os
import pandas as pd
from django.shortcuts import render
from sklearn.cross_validation import StratifiedKFold, train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from textblob import TextBlob
from spamchecker.models import TextEntry

# add path to NLTK file
#STOP = set(stopwords.words('english'))

nltk.data.path.append('nltk_data')
# load stopwords
stopwords = set(stopwords.words('english'))
SPAMMESSAGES = pd.read_csv('/home/gaurav/PycharmProjects/spamchecker/spamchecker/SMSSpamCollection', sep='\t',quoting=csv.QUOTE_NONE, names=["label", "message"])


# Preprocessing
def split_tokens(message):
    message = unicode(message, 'utf8')
    return TextBlob(message).words

def split_lemmas(message):
    message = unicode(message, 'utf8').lower()
    words = TextBlob(message).words
    return [word.lemma for word in words]

def split_stopwords(message):
    message = unicode(message, 'utf8').lower()
    words = TextBlob(message).words
    return [word.lemma for word in words if word not in stopwords]

# Training
def train_multinomial_nb(messages):
    # split dataset for cross validation


    msg_train, msg_test, label_train, label_test = train_test_split(messages['message'], messages['label'],
                                                                    test_size=0.2)
    # create pipeline
    pipeline = Pipeline([('bow', CountVectorizer(analyzer=split_stopwords)), ('tfidf', TfidfTransformer()),
                         ('classifier', MultinomialNB())])
    # pipeline parameters to automatically explore and tune
    params = {
        'tfidf__use_idf': (True, False),
        'bow__analyzer': (split_lemmas, split_tokens, split_stopwords),
    }
    grid = GridSearchCV(
        pipeline,
        params,  # parameters to tune via cross validation
        refit=True,  # fit using all data, on the best detected classifier
        n_jobs=-1,
        scoring='accuracy',
        cv=StratifiedKFold(label_train, n_folds=5),
    )
    # train
    nb_detector = grid.fit(msg_train, label_train)
    print ""
    predictions = nb_detector.predict(msg_test)
    print ":: Confusion Matrix"
    print ""
    print confusion_matrix(label_test, predictions)
    print ""
    print ":: Classification Report"
    print ""
    print classification_report(label_test, predictions)
    # save model to pickle file
    file_name = '/home/gaurav/PycharmProjects/spamchecker/spamchecker/sms_spam_nb_model.pkl'
    with open(file_name, 'wb') as fout:
        cPickle.dump(nb_detector, fout)
    print 'model written to: ' + file_name

def train_svm(messages):
    # split dataset for cross validation
    msg_train, msg_test, label_train, label_test = train_test_split(messages['message'], messages['label'],
                                                                    test_size=0.2)
    # create pipeline
    pipeline = Pipeline(
        [('bow', CountVectorizer(analyzer=split_stopwords)), ('tfidf', TfidfTransformer()), ('classifier', SVC())])
    # pipeline parameters to automatically explore and tune
    params = [
        {'classifier__C': [1, 10, 100, 1000], 'classifier__kernel': ['linear']},
        {'classifier__C': [1, 10, 100, 1000], 'classifier__gamma': [0.001, 0.0001], 'classifier__kernel': ['rbf']},
    ]
    grid = GridSearchCV(
        pipeline,
        param_grid=params,  # parameters to tune via cross validation
        refit=True,  # fit using all data, on the best detected classifier
        n_jobs=-1,
        scoring='accuracy',
        cv=StratifiedKFold(label_train, n_folds=5),
    )
    # train
    svm_detector = grid.fit(msg_train, label_train)
    print ""
    print ":: Confusion Matrix"
    print ""
    print confusion_matrix(label_test, svm_detector.predict(msg_test))
    print ""
    print ":: Classification Report"
    print ""
    print classification_report(label_test, svm_detector.predict(msg_test))
    # save model to pickle file
    file_name = '/home/gaurav/PycharmProjects/spamchecker/spamchecker/sms_spam_svm_model.pkl'
    with open(file_name, 'wb') as fout:
        cPickle.dump(svm_detector, fout)
    print 'model written to: ' + file_name


def predictmessage(message):
    nb_detector = cPickle.load(open('/home/gaurav/PycharmProjects/spamchecker/spamchecker/sms_spam_nb_model.pkl'))
    svm_detector = cPickle.load(open('/home/gaurav/PycharmProjects/spamchecker/spamchecker/sms_spam_svm_model.pkl'))

    nb_predict = nb_detector.predict([message])[0]
    svm_predict = svm_detector.predict([message])[0]

    if (nb_predict == 'spam'):
        TextEntry.objects.create(text_type='sms', text_content=message, text_result='SPAM')
    else:
        TextEntry.objects.create(text_type='sms', text_content=message, text_result='HAM')

    return 'SVM as ' + svm_predict + ' AND Naive Bayes as ' + nb_predict

class SpamCheck():
    def spamchecksmsResult(self, message):
        # check if models exist, if not run training
        if (os.path.isfile('/home/gaurav/PycharmProjects/spamchecker/spamchecker/sms_spam_nb_model.pkl') == False):
            print "Creating Naive Bayes Model....."
            train_multinomial_nb(SPAMMESSAGES)

        if (os.path.isfile('/home/gaurav/PycharmProjects/spamchecker/spamchecker/sms_spam_svm_model.pkl') == False):
            print ""
            print "Creating SVM Model....."
            train_svm(SPAMMESSAGES)

        prediction = predictmessage(message)
        output = 'Entered text is predicted by', prediction
        return output

This is how my project directory is回溯

AttributeError at /spamsmscheck/
'module' object has no attribute 'split_stopwords'
Request Method: POST
Request URL:    http://127.0.0.1:8000/spamsmscheck/
Django Version: 1.11rc1
Exception Type: AttributeError
Exception Value:    
'module' object has no attribute 'split_stopwords'
Exception Location: /home/gaurav/PycharmProjects/spamchecker/spamchecker/SmsSpamCheck.py in predictmessage, line 121
Python Executable:  /usr/bin/python2.7
Python Version: 2.7.11
Python Path:    
['/home/gaurav/PycharmProjects/spamchecker',
 '/usr/lib/python2.7/site-packages/Django-1.11rc1-py2.7.egg',
 '/usr/lib/python2.7/site-packages/pytz-2016.10-py2.7.egg',
 '/home/gaurav/PycharmProjects/spamchecker',
 '/usr/lib64/python27.zip',
 '/usr/lib64/python2.7',
 '/usr/lib64/python2.7/plat-linux2',
 '/usr/lib64/python2.7/lib-tk',
 '/usr/lib64/python2.7/lib-old',
 '/usr/lib64/python2.7/lib-dynload',
 '/usr/lib64/python2.7/site-packages',
 '/usr/lib/python2.7/site-packages']
Server time:    Sun, 17 Sep 2017 11:58:34 +0000

注意:我还没有在django-project中使用过应用程序。我尝试在django-app下的django-app中使用上面的代码,我得到同样的错误。

0 个答案:

没有答案