我正在尝试对RStudio(在MacBook Pro上)R版本3.4.1中的大型文本语音进行线性判别分析。基于其他问题,我设置了最大ppsize。
options(expressions = 5e5)
下面是一个可重现的示例,它会抛出相同的错误。有什么建议吗?
library(tm)
library(MASS)
shakespeare.v<-scan("https://archive.org/stream/in.ernet.dli.2015.182303/2015.182303.Shakespeare-Complete-Works_djvu.txt",what="character",sep="\n",)
docs<-Corpus(VectorSource(shakespeare.v))
toSpace <- content_transformer(function(x, pattern) {return
(gsub(pattern, " ", x))})
docs<-tm_map(docs,content_transformer(tolower))
docs<-tm_map(docs, removePunctuation)
docs<-tm_map(docs, removeNumbers)
docs<-tm_map(docs, toSpace, "page")
docs<-tm_map(docs, stripWhitespace)
docs<-tm_map(docs, toSpace, "filename")
full.words.l<-gsub("[^[:alnum:][:space:]']", " ", docs)
full.words.l<-strsplit(full.words.l, "\\s+")
full.words.v<-unlist(full.words.l)
full.l.3000 <- split(full.words.v,
ceiling(seq_along(full.words.v)/3000))
chunks.v.3000<-unlist(full.l.3000)
chunks.c.3000<-Corpus(VectorSource(full.l.3000))
matrix.3000<-matrix(chunks.c.3000)
dtm.3000<-DocumentTermMatrix(chunks.c.3000)
m.3000<-as.matrix(dtm.3000)
df.3000<-as.data.frame(m.3000)
lda.3000<-lda(full.l.3000~.,
df.3000, na.action = "na.omit", CV=TRUE)