我正在对从Twitter检索到的文本进行情感分析。我可以将文本索引到Solr中,但不能将新字段索引->将情感索引到Solr中。我可以知道添加新字段的正确方法是什么吗?
class Model:
def __init__(self, text, sentiment):
self.text = text
self.sentiment = sentiment
class ModelEncoder(json.JSONEncoder):
def clean_tweet(self, tweet):
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
def analyze_sentiment(self, tweet):
analysis = TextBlob(self.clean_tweet(tweet))
if analysis.sentiment.polarity > 0:
return 'positive'
elif analysis.sentiment.polarity == 0:
return 'neutral'
else:
return 'negative'
def default(self, obj):
if isinstance(obj, Model):
return {"text": obj.text,
"sentiment": obj.sentiment}
return json.JSONEncoder.default(self, obj)
def crawldata(self, name, store=False):
...
while True:
print('Crawling page', i, 'of', name)
tweets = api.user_timeline(name, count=200, page=i)
for tweet in tweets:
text = tweet.text
sentiment = self.analyze_sentiment(tweet.text)
data = Model(text, sentiment)
result.append(data)
i += 1
print('Crawled', len(tweets), 'tweets')
if len(tweets) == 0:
break
result_json = json.dumps(result, cls=ModelEncoder)
...