> head(Wcloud.Data)
# A tibble: 6 x 3
  document term       count
  <chr>    <chr>      <dbl>
1 1        accept         1
2 1        access         1
3 1        accomplish     1
4 1        account        4
5 1        accur          2
6 1        achiev         1

我有33,647,383次观测,因此它是一个非常大的数据帧。如果我使用max()函数,我得到一个非常高的数字(64116),但我的数据框中没有单词的频率为64116.此外,如果我用wordcloud()绘制闪亮的数据帧,它会绘制相同的单词几次。此外,如果我想对我的专栏count进行排序,则该专栏无效 - sort(Wcloud.Data$count,decreasing = TRUE)。所以有些事情是不正确的,但我不知道,有什么以及如何解决它。有人有什么想法吗?


> observations.tf
<<DocumentTermMatrix (documents: 76717, terms: 4234)>>
Non-/sparse entries: 33647383/291172395
Sparsity           : 90%
Maximal term length: 15
Weighting          : term frequency (tf)




Wcloud.Data<- data.frame(Document= c(rep(1,6)), 
                         term = c("accept", "access","accomplish", "account", "accur", "achiev"),
                         count = c(1,1,1,4,2,1))

Data<-Wcloud.Data %>% 
  group_by(term) %>% 
  summarise(Frequency = sum(count))
wordcloud(words = Data$term, freq = Data$Frequency, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

Data <- data_frame(text = c("Chinese Beijing Chinese",
                              "Chinese Chinese Shanghai",
                              "this is china",
                              "china is here",
                              'hello china',
                              "Chinese Beijing Chinese",
                              "Chinese Chinese Shanghai",
                              "this is china",
                              "china is here",
                              'hello china',
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
DocTerm <- quanteda::dfm(Data$text)
# Document-feature matrix of: 19 documents, 11 features (78.5% sparse).
# 19 x 11 sparse Matrix of class "dfm"
# features
# docs     chinese beijing shanghai this is china here hello kyoto japan tokyo
# text1        2       1        0    0  0     0    0     0     0     0     0
# text2        2       0        1    0  0     0    0     0     0     0     0
# text3        0       0        0    1  1     1    0     0     0     0     0
# text4        0       0        0    0  1     1    1     0     0     0     0
# text5        0       0        0    0  0     1    0     1     0     0     0
# text6        2       1        0    0  0     0    0     0     0     0     0
# text7        2       0        1    0  0     0    0     0     0     0     0
# text8        0       0        0    1  1     1    0     0     0     0     0
# text9        0       0        0    0  1     1    1     0     0     0     0
# text10       0       0        0    0  0     1    0     1     0     0     0
# text11       0       0        0    0  0     0    0     0     1     1     0
# text12       1       0        0    0  0     0    0     0     0     1     1
# text13       0       0        0    0  0     0    0     0     1     1     0
# text14       1       0        0    0  0     0    0     0     0     1     1
# text15       0       0        0    0  0     0    0     0     1     1     0
# text16       1       0        0    0  0     0    0     0     0     1     1
# text17       0       0        0    0  0     0    0     0     1     1     0
# text18       1       0        0    0  0     0    0     0     0     1     1
# text19       0       0        0    0  0     0    0     0     0     1     0

Mat<-quanteda::convert(DocTerm,"data.frame")[,2:ncol(DocTerm)] # Converting to a Dataframe without taking into account the text variable
Result<- colSums(Mat) # This is what you are interested in
# > Result
# chinese  beijing shanghai     this       is    china     here    hello    kyoto    japan 
# 24        4        4        4        8       12        4        4        8       18