制作DTM时出现以下错误
UseMethod(“ mutate_”)中的错误:没有适用于'mutate_'的方法 应用于“字符”类的对象
text = charges.df$charge.desc %>% clean_text(alphanum=TRUE)
textdf = data.frame(docID=seq(1:length(text)), text = text, stringsAsFactors=FALSE)
textdf = tibble(docID=seq(1:length(text)), text = text, stringsAsFactors=FALSE)
corpus_df = textdf %>% dplyr::sample_n(1000)
# using just a 1000 doc sample for demo purposes
system.time({
charge.dtm = corpus_df$text %>%
# routine to build bigram tokens with "_"
replace_bigram(min_freq=2)
charge.dtm = charge.dtm$text %>%
casting_dtm() %>% # dtm is cast using tidytext alone
# processing DTMs for compactness
preprocess_dtm(min_occur = 0.01, max_occur = 0.90) %>% # prune dtm colms afap
nonempty_dtm() # routine to clean DTMs of empty rows/colms
}) # t = 41.5 secs for 10k documents
dim(charge.dtm)