Question

   import re
   from nltk.tokenize import word_tokenize
   from string import punctuation
   from nltk.corpus import stopwords
   from nltk import punkt

  class pre:
      def __init__(self):
        self._stopwords=set(stopwords.words('english')+list(punctuation)+['AT_USER','URL'])


    def processtweets(self,list1):
        processedtweets=[]
        for tweet in list1:               
  processedtweets.append((self._processtweet(tweet["text"]),tweet["label"]))
        return processedtweets

    def _processtweet(self, tweet):            
        tweet=tweet.lower()
        tweet=re.sub('((www\.[^s]+)|(https?://[^s]+))','URL',tweet)
        tweet=re.sub('@[^s]+','AT_USER',tweet)
        tweet=re.sub(r'#([^\s]+)',r'\1',tweet)
        #r is used to not excape any character
        tweet=word_tokenize(tweet)
        #tokenized the tweet into the list of words
        return [word for word in tweet if word not in self._stopwords]

这给出了错误AttributeError：＆＃39; Status＆＃39;对象没有属性＆＃39; lower＆＃39;在函数tweet = tweet.lower（）

python-＆＃39;状态＆＃39;对象没有属性＆＃39; lower＆＃39;

0 个答案: