我一直试图获得一个wordcloud并运行whatsapp聊天。下面的代码是我一直在使用的:
setwd("E:/")
library (ggplot2)
library(tm)
library(wordcloud)
library(syuzhet)
texts <- readLines("chat.txt")
docs <- Corpus(VectorSource(texts))
docs
trans<- content_transformer(function(x, pattern) gsub(pattern = " ", x))
docs <- tm_map(docs,trans,"/")
docs <- tm_map(docs,trans,"@")
docs <- tm_map(docs,trans,"\\|")
docs <- tm_map(docs,content_transformer(tolower))
docs <- tm_map(docs, removeNumbers)
docs <- tm_map(docs,removeWords, stopwords("en"))
docs <- tm_map(docs,removePunctuation)
docs <- tm_map(docs,stripWhitespace)
docs <- tm_map(docs,stemDocument)
dtm <- TermDocumentMatrix(docs)
mat <- as.matrix(dtm)
v <- sort(rowSums(mat), decreasing = TRUE)
d<- data.frame(word= names(v), freq=v)
head(d,10)
set.seed(1056)
wordcloud(words = d$word, freq = d$freq, min.freq = 1,
max.words = 200, random.order = FALSE, rot.per = 0.35,
colors = brewer.pal(8,"Dark2"))
然而某些字符如“Ëœ”,“ÂÔ,“Ëœ”等仍未被删除并扭曲wordcloud