我目前正在使用tidyverse,twitteR,tm,RColorBrewer,stringr和wordcloud来刮擦Twitter数据。当我运行代码时,strwidth(words [i],cex = size [i],...)中的错误Error:尝试创建wordcloud时出现无效的'cex'值。有谁知道如何解决这一问题?下面是我的代码。谢谢。
serena <- searchTwitter("serena+williams", n = 3000,
lang = "en")
serena_df <- twListToDF(serena)
serena_df <- serena_df[,1]
tweet.corpus = Corpus(VectorSource(serena_df))
tweet.removeURL = function(x) gsub("http[^[:space:]]*", "", x)
tweet.removeATUser = function(x) gsub("@[a-z, A-Z]*", "", x)
tweet.removeEmoji = function(x) gsub("\\p{So}|\\p{Cn}", "", x, perl = TRUE)
tweet.removeSpecialChar = function(x) gsub("[^[:alnum:]]", "", x)
tweet.corpus = tm_map(tweet.corpus, content_transformer(tweet.removeURL))
inspect(tweet.corpus[1:4])
tweet.corpus = tm_map(tweet.corpus, content_transformer(tweet.removeATUser))
inspect(tweet.corpus[1:4])
tweet.corpus = tm_map(tweet.corpus, content_transformer(tweet.removeEmoji))
#tweet.corpus = tm_map(tweet.corpus, stripWhitespace)
tweet.corpus = tm_map(tweet.corpus, content_transformer(tweet.removeSpecialChar))
tweet.corpus = tm_map(tweet.corpus, removePunctuation)
tweet.corpus = tm_map(tweet.corpus, content_transformer(tolower))
tweet.corpus = tm_map(tweet.corpus, removeWords, c(stopwords("english"), "RT", "rt"))
tweet.corpus = tm_map(tweet.corpus, removeNumbers)
ap.tdm <- TermDocumentMatrix(tweet.corpus)
ap.m <- as.matrix(ap.tdm)
dim(ap.m)
ap.v <- sort(rowSums(ap.m), decreasing = TRUE)
ap.d <- data.frame(word = names(ap.v), freq = ap.v)
pal2 <- brewer.pal(8, "Dark2")
png("serena.png", width = 1920, height = 1080)
wordcloud(ap.d$word, ap.d$freq, scale = c(8.2), min.freq = 3,
max.words = 50, random.order = FALSE, rot.per = .15, colors = pal2)
dev.off()