我正在研究R中的wordcloud,到目前为止我只用基本的东西成功但是我想做的是我想要显示特定位置的文字云。例如,如果我有
这样的文字 TEXT LOCATION
True or false? link(#Addition, #Classification) NewYork,USA
Gene deFuser: detecting gene fusion events from protein sequences #bmc #bioinformatics Norwich,UK
Biologists do have a sense of humor, especially computational bio people France
Semantic Inference using #Chemogenomics Data for Drug Discovery London,UK
这是我正在使用的基本wordcloud代码
library(tm)
library(SnowballC)
library(wordcloud)
DATA<-c('True or false? link(#Addition, #Classification) ','Gene deFuser: detecting gene fusion events from protein sequences #bmc #bioinformatics',' Biologists do have a sense of humor, especially computational bio people','Semantic Inference using #Chemogenomics Data for Drug Discovery')
Location<-c('NewYork,USA','Norwich,UK',' France','London,UK')
jeopQ<-data.frame(DATA,Location)
jeopCorpus <- Corpus(VectorSource(jeopQ$DATA))
jeopCorpus <- tm_map(jeopCorpus, content_transformer(tolower))
jeopCorpus <- tm_map(jeopCorpus, removePunctuation)
jeopCorpus <- tm_map(jeopCorpus, PlainTextDocument)
jeopCorpus <- tm_map(jeopCorpus, removeNumbers)
jeopCorpus <- tm_map(jeopCorpus, removeWords, stopwords('english'))
jeopCorpus <- tm_map(jeopCorpus, stemDocument)
myDTM = TermDocumentMatrix(jeopCorpus, control = list(minWordLength = 1))
m = as.matrix(myDTM)
v = sort(rowSums(m), decreasing = TRUE)
set.seed(4363)
wordcloud(names(v), v,max.words =100,min.freq=3,scale=c(4,0.1), random.order = FALSE,rot.per=.5,vfont=c("sans serif","plain"),colors=palette())
我想要一个单独的文字云,其中包含“USA”的位置和其中包含“UK”的位置,以及法国的单独wordcloud,这可能吗?
答案 0 :(得分:0)
jeopQ<-data.frame(DATA,Location)
# Clean Location
jeopQ$Location <- sub('.*,\\s*','', jeopQ$Location)
# Loop
for(i in unique(jeopQ$Location)){
jeopCorpus <- Corpus(VectorSource(jeopQ$DATA[jeopQ$Location==i]))
jeopCorpus <- tm_map(jeopCorpus, content_transformer(tolower))
jeopCorpus <- tm_map(jeopCorpus, removePunctuation)
jeopCorpus <- tm_map(jeopCorpus, PlainTextDocument)
jeopCorpus <- tm_map(jeopCorpus, removeNumbers)
jeopCorpus <- tm_map(jeopCorpus, removeWords, stopwords('english'))
jeopCorpus <- tm_map(jeopCorpus, stemDocument)
myDTM = TermDocumentMatrix(jeopCorpus, control = list(minWordLength = 1))
m = as.matrix(myDTM)
v = sort(rowSums(m), decreasing = TRUE)
set.seed(4363)
wordcloud(names(v), v,max.words =100,min.freq=3,scale=c(4,0.1), random.order = FALSE,rot.per=.5,vfont=c("sans serif","plain"),colors=palette())
}