编写一个函数从挖掘页面列表中提取顶部术语。当我使用lapply时它工作但是给出和错误,checkForRemoteErrors(val)中的错误:23个节点产生错误;第一个错误:当我使用parLapply时,'closure'类型的对象不是子集。 [以下代码]
relevent_terms<-function(x){
tokens = as.character(x) %>%
tolower %>%
removeWords(c("cahched","recipe","similar","food","product","pdf","html","book","result","review","view","webstaurantstore","order","sale","cal","image","free","ebay"))%>%
word_tokenizer
it = itoken(tokens, ids = dat$Description[x], progressbar = FALSE)
v = create_vocabulary(it) %>%
prune_vocabulary(term_count_min = 1)
vectorizer = vocab_vectorizer(v)
dtm = create_dtm(it, vectorizer, type = "dgTMatrix")
lda_model = LDA$new(n_topics = 1, doc_topic_prior = 0.1, topic_word_prior =
0.01)
doc_topic_distr = lda_model$fit_transform(x = dtm, n_iter = 1000,
convergence_tol = 0.01,
n_check_convergence = 25, progressbar = FALSE)
tags<-paste(lda_model$get_top_words(n = 10, lambda = 0.3),collapse = " ")
relevance<-as.data.frame(lda_model$topic_word_distribution)
relevance<-as.data.frame(t(relevance))
relevance<-relevance[order(-relevance$V1),]
relevance<-relevance[1:10]
relevance<-relevance/(sum(relevance))
paste(tags,paste(as.character(relevance),collapse = " "))
}
no_cores <- detectCores() - 1
cl <- makeCluster(no_cores)
clusterEvalQ(cl,{library(magrittr,lib.loc = "c:\\R
packages");library(stringr,lib.loc = "C:\\R
packages");library(text2vec,lib.loc = "C:\\R packages");
library(NLP,lib.loc = "C:\\R packages");library(tm,lib.loc = "C:\\R
packages");
library(pluralize,lib.loc = "C:\\R packages");
library(topicmodels,lib.loc = "C:\\R packages")
})
clusterExport(cl = cl,"dat")
clusterExport(cl = cl,"relevent_terms")
dat$Tags<-parLapply(cl = cl,X = dat$cleaned,fun = relevent_terms)