不同语言的数据框

时间:2019-01-15 15:14:53

标签: r

我在LDAVis的虚拟化过程中遇到问题。

我使用希腊语,由于编码问题,在界面中我看不到可能的单词。

下面是一个简单的示例:

df <- data.frame(freetext = c("Εδώ και αρκετό καιρό που συνεχίζουν","και τώρα που έγιναν"), stringsAsFactors = FALSE)

library(topicmodels)
library(dplyr)
library(stringi)
library(quanteda)
library(LDAvis)
library(stringr)
library(textcat)
library(data.table)

corpus <- corpus(df$freetext)
myDfm <- dfm(df$freetext)
burnin <- 500 #1000
iter <- 1000 #1500
keep <- 5

k <- 2
mods <- LDA(myDfm, k,
             method = "Gibbs",
             control = list(burnin = burnin,
                            iter = iter,
                            keep = keep))

testforjson <- function(mods, corpus, myDfm){     
     # Find required quantities
     phi <- posterior(mods)$terms %>% as.matrix
     theta <- posterior(mods)$topics %>% as.matrix
     vocab <- colnames(phi)
     doc_length <- vector()
     for (i in 1:ndoc(corpus)) {
         temp <- paste(corpus$documents$texts[[i]], collapse = ' ')
         doc_length <- c(doc_length, stri_count(temp, regex = '\\S'))
         print (i)
     }
     freq_matrix <- data.frame(ST = featnames(myDfm),
                               Freq = colSums(myDfm))
     # Convert to json
     json_lda <- LDAvis::createJSON(phi = phi, theta = theta,
                                    vocab = vocab,
                                    doc.length = doc_length,
                                    term.frequency = freq_matrix$Freq)

     return(json_lda)
 }

mystestjson <- testforjson(mods, corpus, myDfm)
serVis(mystestjson)

如何解决编码问题?

0 个答案:

没有答案