我正在尝试创建一个术语文档矩阵,但收到的错误如下:
Error in simple_triplet_matrix(i, j, v, nrow = length(terms), ncol = length(corpus), :
'i, j' invalid
以下是我正在运行的代码
library(devtools)
library(twitteR)
library(tm)
library(ggplot2)
library(stringr)
api_key <- "XYZ"
api_secret <- "ABC"
access_token <- "DEF"
access_token_secret <- "GHI"
setup_twitter_oauth(api_key,api_secret,access_token,access_token_secret)
until<-as.character(Sys.Date())
since<- as.character(Sys.Date()-7)
tweetsnew<- searchTwitter("#YesMaybelline",lang='en',n=5000,since=since,
until = until)
tweetsnew.df <- twListToDF(tweetsnew)
j<-1
HashTagsList<-c()
HashTags<-str_extract_all(tweetsnew.df$text,"#\\S+")
HashTags<-HashTags[!HashTags %in% c("character(0)")]
while (j<=length(HashTags)){
HashTagsList<-c(HashTagsList,HashTags[[j]])
j<-j+1
}
HashTagsList<- gsub("#", "", HashTagsList)
HashTagsList<-unique(HashTagsList)
HashTagsList<-gsub("[^[:alnum:] ]", "", HashTagsList)
k<-1
HandleTagsList<-c()
HandleTags<-str_extract_all(tweetsnew.df$text,"@\\S+")
HandleTags<-HandleTags[!HandleTags %in% c("character(0)")]
while (k<=length(HandleTags)){
HandleTagsList<-c(HandleTagsList,HandleTags[[k]])
k<-k+1
}
HandleTagsList<- gsub("@", "", HandleTagsList)
HandleTagsList<-unique(HandleTagsList)
HandleTagsList<-gsub("[^[:alnum:] ]", "", HandleTagsList)
tweetsnew.df$text<-gsub("#\\S+", "", tweetsnew.df$text)
tweetsnew.df$text<-gsub("@\\S+", "", tweetsnew.df$text)
tweetsnew.df$text<-gsub("http\\S+", "", tweetsnew.df$text)
Tweetsnew.df<-subset(tweetsnew.df,isRetweet=="FALSE")
Tweetsnew.df$text<-gsub("[^[:alpha:] ]", " ", Tweetsnew.df$text)
Tweetsnew.df$text<-tolower(Tweetsnew.df$text)
myCorpus <-Corpus(VectorSource(Tweetsnew.df$text))
myStopwords<-c(stopwords("english"),"maybelline","https","like","bring","make","thought","please","maybe",
"know","just","want","wearing","really","last","better","best","first")
myCorpus<-tm_map(myCorpus,removeWords,myStopwords)
myCorpus<-tm_map(myCorpus,removeWords,HashTagsList)
myCorpus<-tm_map(myCorpus,removeWords,HandleTagsList)
myCorpus <- tm_map(myCorpus, PlainTextDocument)
myTdm<-TermDocumentMatrix(myCorpus,control=list(wordLengths=c(3,13)))
在此错误未到来之前突然悄悄进入。它发生在运行上面的最后一行代码之后(myTdm&lt; -TermDocumentMatrix(myCorpus,control = list(wordLengths = c(3,13))))。我无法弄清楚原因。任何帮助表示赞赏。