Question

我目前正在使用tidyverse，twitteR，tm，RColorBrewer，stringr和wordcloud来刮擦Twitter数据。当我运行代码时，strwidth（words [i]，cex = size [i]，...）中的错误Error：尝试创建wordcloud时出现无效的'cex'值。有谁知道如何解决这一问题？下面是我的代码。谢谢。

serena <- searchTwitter("serena+williams", n = 3000,
                        lang = "en")

serena_df <- twListToDF(serena)
serena_df <- serena_df[,1]

tweet.corpus = Corpus(VectorSource(serena_df))

tweet.removeURL = function(x) gsub("http[^[:space:]]*", "", x)
tweet.removeATUser = function(x) gsub("@[a-z, A-Z]*", "", x)
tweet.removeEmoji = function(x) gsub("\\p{So}|\\p{Cn}", "", x, perl = TRUE)
tweet.removeSpecialChar = function(x) gsub("[^[:alnum:]]", "", x)

tweet.corpus = tm_map(tweet.corpus, content_transformer(tweet.removeURL))
inspect(tweet.corpus[1:4])
tweet.corpus = tm_map(tweet.corpus, content_transformer(tweet.removeATUser))
inspect(tweet.corpus[1:4])
tweet.corpus = tm_map(tweet.corpus, content_transformer(tweet.removeEmoji))
#tweet.corpus = tm_map(tweet.corpus, stripWhitespace)
tweet.corpus = tm_map(tweet.corpus, content_transformer(tweet.removeSpecialChar))
tweet.corpus = tm_map(tweet.corpus, removePunctuation)
tweet.corpus = tm_map(tweet.corpus, content_transformer(tolower))

tweet.corpus = tm_map(tweet.corpus, removeWords, c(stopwords("english"), "RT", "rt"))
tweet.corpus = tm_map(tweet.corpus, removeNumbers)


ap.tdm <- TermDocumentMatrix(tweet.corpus)
ap.m <- as.matrix(ap.tdm)
dim(ap.m)
ap.v <- sort(rowSums(ap.m), decreasing = TRUE)
ap.d <- data.frame(word = names(ap.v), freq = ap.v)

pal2 <- brewer.pal(8, "Dark2")
png("serena.png", width = 1920, height = 1080)
wordcloud(ap.d$word, ap.d$freq, scale = c(8.2), min.freq = 3,
            max.words = 50, random.order = FALSE, rot.per = .15, colors = pal2)
  dev.off()

为什么不创建这种视觉效果？

0 个答案: