我试图获取每个帖子中使用的单词的频率,并将它们作为列添加到训练数据中,下面的代码适当地运行1个单词,对于第二个单词,它会抛出此错误。
word_frequency <- function(w){
for(i in 2:1000){
review_text <- paste(Train$Post[i:i], collapse=" ")
review_source <- VectorSource(review_text)
corpus <- Corpus(review_source)
corpus <- tm_map(corpus, content_transformer(tolower))
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, removeWords, stopwords("english"))
dtm <- DocumentTermMatrix(corpus)
dtm2 <- as.matrix(dtm)
frequency <- colSums(dtm2)
frequency <- frequency[names(frequency) == w]
frequency <- as.list(frequency)
freq<-rbind(freq, frequency)
freq.withNA <- sapply(freq, function(x) ifelse(x == "NULL", NA, x))
}
return(freq)
}
Train <- Training[1:1000,]
for (w in wordlist) {
freq <- as.integer()
new <- word_frequency(w)
Train <- cbind(Train, new)
print(paste("Completed word ", w, sep=""))
}