This is a sample of my data in R view. With the scores at the side 我是R语言的新手,我很想了解如何创建正负词云。
已经加载了词典并打了我的推文。这是我的脚本:
library(twitteR)
library(stringr)
library(tm)
library(ggplot2)
library(wordcloud)
library(SnowballC)
library(wordcloud2)
tweets <- readRDS("IKEAUKSupport_tweets.az")
tweet_clean <- tm_map(tweet_clean, stemDocument)
DTM <- DocumentTermMatrix(tweet_clean)
DTM
saveRDS(DTM, file = "DTM.az")
inspect(DTM[1:10, 1:20])
m <- as.matrix(DTM)
write.csv(m, file = "DTM.csv")
pos.words <- scan("positive-words.txt",what="character", comment.char="")
neg.words <- scan("negative-words.txt",what="character", comment.char="")
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
require(plyr)
require(stringr)
scores = laply(sentences, function(sentence, pos.words, neg.words) {
# cleaning up sentences with R's regex-driven global substitute, gsub()
sentence = gsub('[[:punct:]]', '', sentence)
sentence = gsub('[[:cntrl:]]', '', sentence)
sentence = gsub('\\d+', '', sentence)
# and convert to lower case
sentence = tolower(sentence)
# splitting into words. str_split is in the stringr package
word.list = str_split(sentence, '\\s+')
# sometimes a list() is one level of hierarchy too much
words = unlist(word.list)
# compare our words to the dictionaries of positive & negative terms
pos.matches = match(words, pos.words)
neg.matches = match(words, neg.words)
# match() returns the position of the matched term or NA
# we just want a TRUE/FALSE:
pos.matches = !is.na(pos.matches)
neg.matches = !is.na(neg.matches)
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
score = sum(pos.matches) - sum(neg.matches)
return(score)
}, pos.words, neg.words, .progress=.progress )
scores.df = data.frame(score=scores, text=sentences)
return(scores.df)
}
tweets.scores = score.sentiment(tweet_text, pos.words, neg.words, .progress='text')
write.csv(tweets.scores, "tweets_scores.csv")
table(tweets.scores$score)
traffic_light <- ifelse(tweets.scores$score<0, "negative", NA)
traffic_light <- ifelse(tweets.scores$score==0, "neutral", traffic_light)
traffic_light <- ifelse(tweets.scores$score>0, "postive", traffic_light)
table(traffic_light)
round(prop.table(table(traffic_light)), 2)*100
我真的很想知道我如何创建带有标签的单词云,以将正负词分开。