我目前正在尝试从我制作的语料库中删除单词。这是顺序。任何帮助将不胜感激。
#Create a Corpus
data_texts <- readtext(paste0(txt_directory, "*"), encoding = "UTF-8")
data_corpus_Afghanistan <- corpus(data_texts)
#Transform Corpus to dfm
data_corpus_dfm <- dfm(data_corpus_Afghanistan, remove = stopwords("english"),
remove_punct = TRUE, remove_numbers = TRUE,)
data_corpus_dfm <- dfm_tolower(data_corpus_dfm, keep_acronyms = FALSE)
#After found words I don't want
#create new corpus with these words removed
data_corpus_dfm <- dfm(data_corpus_Afghanistan, removeWords, c("anonymous"))