AttributeError:“列表”对象在制作矩阵时没有属性“较低”

时间:2019-09-30 16:43:08

标签: python pandas tokenize lowercase sklearn-pandas

def generate_tokens(sentence_list):
    """ This function returns a list of lists of
    clean tokens"""


    final_tokens = []
    for sentence in sentence_list:
        new_text = re.sub("[^A-Za-z]"," ", sentence)
        tokens = new_text.lower().split()
        tokens = [el for el in tokens if el not in sw]
        if tokens!=[]:
            final_tokens.append(tokens[0])
    return final_tokens
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

sw = nltk.corpus.stopwords.words('english')

df["sentences"] = df.Content.astype(str).str.split()

df["clean_tokens"] = df.sentences.apply(generate_tokens)
total = []

df.head()

from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()

X = vectorizer.fit_transform(df["clean_tokens"])

print(X.toarray())

我该如何纠正?

AttributeError                            Traceback (most recent call last)
<ipython-input-101-3d88bc2d8f98> in <module>
      2 vectorizer = CountVectorizer()
      3 
----> 4 X = vectorizer.fit_transform(df["clean_tokens"])
      5 
      6 print(X.toarray())

~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in fit_transform(self, raw_documents, y)
   1056 
   1057         vocabulary, X = self._count_vocab(raw_documents,
-> 1058                                           self.fixed_vocabulary_)
   1059 
   1060         if self.binary:

~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _count_vocab(self, raw_documents, fixed_vocab)
    968         for doc in raw_documents:
    969             feature_counter = {}
--> 970             for feature in analyze(doc):
    971                 try:
    972                     feature_idx = vocabulary[feature]

~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(doc)
    350                                                tokenize)
    351             return lambda doc: self._word_ngrams(
--> 352                 tokenize(preprocess(self.decode(doc))), stop_words)
    353 
    354         else:

~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(x)
    254 
    255         if self.lowercase:
--> 256             return lambda x: strip_accents(x.lower())
    257         else:
    258             return strip_accents

AttributeError: 'list' object has no attribute 'lower'

0 个答案:

没有答案