parLapply函数返回错误(适用于Lapply)

时间:2017-10-19 16:24:02

标签: r functional-programming

编写一个函数从挖掘页面列表中提取顶部术语。当我使用lapply时它工作但是给出和错误,checkForRemoteErrors(val)中的错误:23个节点产生错误;第一个错误:当我使用parLapply时,'closure'类型的对象不是子集。 [以下代码]

relevent_terms<-function(x){
tokens = as.character(x) %>% 
tolower %>% 
removeWords(c("cahched","recipe","similar","food","product","pdf","html","book","result","review","view","webstaurantstore","order","sale","cal","image","free","ebay"))%>%
word_tokenizer

it = itoken(tokens, ids = dat$Description[x], progressbar = FALSE)

v = create_vocabulary(it) %>% 
prune_vocabulary(term_count_min = 1)

vectorizer = vocab_vectorizer(v)
dtm = create_dtm(it, vectorizer, type = "dgTMatrix")


lda_model = LDA$new(n_topics = 1, doc_topic_prior = 0.1, topic_word_prior = 
0.01)
doc_topic_distr = lda_model$fit_transform(x = dtm, n_iter = 1000, 
                                        convergence_tol = 0.01, 
n_check_convergence = 25, progressbar = FALSE)

tags<-paste(lda_model$get_top_words(n = 10, lambda = 0.3),collapse = " ")

relevance<-as.data.frame(lda_model$topic_word_distribution)
relevance<-as.data.frame(t(relevance))
relevance<-relevance[order(-relevance$V1),]
relevance<-relevance[1:10]
relevance<-relevance/(sum(relevance))

paste(tags,paste(as.character(relevance),collapse = " "))


}

no_cores <- detectCores() - 1
cl <- makeCluster(no_cores)
clusterEvalQ(cl,{library(magrittr,lib.loc = "c:\\R 
packages");library(stringr,lib.loc = "C:\\R 
packages");library(text2vec,lib.loc = "C:\\R packages");
library(NLP,lib.loc = "C:\\R packages");library(tm,lib.loc = "C:\\R 
packages");
library(pluralize,lib.loc = "C:\\R packages");
library(topicmodels,lib.loc = "C:\\R packages")
})
clusterExport(cl = cl,"dat")
clusterExport(cl = cl,"relevent_terms")

dat$Tags<-parLapply(cl = cl,X = dat$cleaned,fun = relevent_terms)

0 个答案:

没有答案