我正在从包含文本数据的文件创建术语网络。我已成功制作网络,但它正在制作文件中所有单词的网络(比如1281个单词)。我想要从数据文件中出现最多次数的网络。 以下是我试过的代码。
text<-read.delim(file='check.csv', header=FALSE, stringsAsFactors=FALSE)
a <- Corpus(VectorSource(text), readerControl = list(language = "en"))
a <- tm_map(a, removeNumbers)
a<- tm_map(a, gsub, pattern=",", replacement=" ")
a <- tm_map(a, removePunctuation)
a <- tm_map(a , stripWhitespace)
a <- tm_map(a, tolower)
a <- tm_map(a, removeWords, stopwords("english"))
a <- tm_map(a, removeWords, c("cox","can","cant","get","set") )
newtext <- lapply(a, function(x){
x <- Corpus(VectorSource(x))
x1 <- x
x <- tm_map(x, stemDocument, language = "english")
x <- tm_map(x, stemCompletion, dictionary=x1)
}
)
newtext1 <- Corpus(VectorSource(newtext))
tdm <- TermDocumentMatrix(newtext1)
termDocMatrix<-tdm
termDocMatrix<-as.matrix(termDocMatrix)
termMatrix<-termDocMatrix %*% t(termDocMatrix) # term-term adjacency matrix
library(igraph)
g <- graph.adjacency(termMatrix, weighted=T, mode="undirected")
g <- simplify(g) # remove loops
V(g)$name
V(g)$label <- V(g)$name
V(g)$degree <- degree(g)
set.seed(3952)
layout1 <- layout.fruchterman.reingold(g)
plot.new()
plot(g, layout=layout1)
我该怎么做?