R中的文本挖掘中的阿拉伯语/波斯语/ UTF8

时间:2015-01-23 15:00:53

标签: r

所以我想在Windows 8.1的RStudio中创建一个词云所以这是我的代码。 我在这里读到这段代码:

tm_map(yourCorpus, function(x) iconv(enc2utf8(x), sub = "byte"))

我解决了RStudio中的UTF8问题。

library(twitteR)
library(tm)
library(wordcloud)
library(RColorBrewer)

load("twitteR_credentials")
registerTwitterOAuth(twitCred)

mach_tweets = searchTwitter("ایرانسل", n=500 , cainfo="cacert.pem")

mach_text = sapply(mach_tweets, function(x) x$getText())

mach_corpus = Corpus(VectorSource(mach_text))

tdm = TermDocumentMatrix(mach_corpus,
                         control = list(removePunctuation = TRUE,
                         stopwords = c("", ""),
                         removeNumbers = TRUE, tolower = TRUE),
                         iconv(enc2utf8(x), sub = "byte"))

m = as.matrix(tdm)
word_freqs = sort(rowSums(m), decreasing=TRUE)
dm = data.frame(word=names(word_freqs), freq=word_freqs)

wordcloud(dm$word, dm$freq, random.order=FALSE, colors=brewer.pal(8, "Dark2"))

png("MachineLearningCloud.png", width=12, height=8, units="in", res=300)
     wordcloud(dm$word, dm$freq, random.order=FALSE, colors=brewer.pal(8, "Dark2"))
dev.off()

但是当我尝试它时会显示gibbrish。

0 个答案:

没有答案