R:如何对每月数据使用lapply

时间:2019-05-07 06:24:58

标签: r dataframe lapply

我有一个包含以下数据的数据集:

Ticket      Feed back                            Date        Month             Rating
12345   The resolution was proper             01-01-2019    January              5
12346   The ticket was closed without notice  02-01-2019    January              3
12347   Good                                  03-01-2019    January              4
12354   He is highly reliable.                03-02-2019    February             4
12355   He accomplished all tasks             04-02-2019    February             4

我正在使用以下代码进行情感分析:

否定词词典-https://gist.github.com/mkulakowski2/4289441

正词词典-https://gist.github.com/mkulakowski2/4289437 (从第36行开始创建了2个txt文件)

library(stringr)
library(tm)
str(positive)  #positive words dictionary
str(negative)  #negatiive words dictionary

file <- sample_reviews$`Feed back`
file <- tolower(file)
filee <- removeWords(file,stopwords("english"))
filee <- removePunctuation(filee)
filee <- stripWhitespace(filee)
filee <- removeNumbers(filee)
filr <- str_split(filee,pattern="\\s+")
fg <- unlist(str_split(filee,pattern="\\s+"))
match(fg,positive)
match(fg,negative) 
a<-sum(!is.na(match(fg,positive)))
b<-sum(!is.na(match(fg,negative)))
c<- a+b
Positiveperc <- (a/c)*100
Negativeperc <- (b/c)*100
mat<-matrix(c(Positiveperc,Negativeperc),ncol=1)
colnames(mat) <- c('values')
rownames(mat) <- c('Positive','Negative')
dat<- cbind("Result"=rownames(mat), mat)
mydata<-data.frame(dat)  

“ mydata”变量中的数据框如下所示:

            Result       values
Positive   "Positive"   "57.785" 
Negative   "Negative"   "48.214"

我需要针对每个月创建我的数据框。 如:

 Month     ValuePostive    Valuenegative
 January        34              66
 February       50              50

使用当前输出无法获得出现的肯定和否定单词的总体百分比。 我应该怎么做才能将其划分为每月百分比图?就像每个月一样,我需要正面和负面情绪的百分比。

1 个答案:

答案 0 :(得分:1)

这可能是您要创建的:

library(tidyverse)
library(reshape2)
library(tm)

# your data
x <- tibble(Ticket = c(12345, 12346, 12347 ,12354, 12355),
            Feedback = c("The resolution was proper", "The ticket was closed without notice", "Good" ,"He is highly reliable.", "He accomplished all tasks"),
            Date = c("01-01-2019", "02-01-2019", "03-01-2019", "03-02-2019", "04-02-2019"),
            Month = c("January", "January", "January", "February", "February"),
            Rating = c(5,4,4,4,4))

# reading lists
negative <- read_tsv("negative.txt",col_names=F)$X1
positive <- read_tsv("positive.txt",col_names=F)$X1
str(positive)  #positive words dictionary
str(negative)  #negatiive words dictionary


sample_reviews <- x

# List-Conversion
sample_reviews <- split(sample_reviews, sample_reviews$Month)

# Your code executed for each month
x <- lapply(sample_reviews, function(x){

file <- x$Feedback
file <- tolower(file)
filee <- removeWords(file,stopwords("english"))
filee <- removePunctuation(filee)
filee <- stripWhitespace(filee)
filee <- removeNumbers(filee)
filr <- str_split(filee,pattern="\\s+")
fg <- unlist(str_split(filee,pattern="\\s+"))
match(fg,positive)
match(fg,negative) 
a<-sum(!is.na(match(fg,positive)))
b<-sum(!is.na(match(fg,negative)))
c<- a+b
Positiveperc <- (a/c)*100
Negativeperc <- (b/c)*100
mat<-matrix(c(Positiveperc,Negativeperc),ncol=1)
colnames(mat) <- c('values')
rownames(mat) <- c('Positive','Negative')
dat<- cbind("Result"=rownames(mat), mat)
mydata<-data.frame(dat)

return(mydata)
})

# Add month as column
x <- lapply(names(x), function(names){
  x[[names]]$Month <- names
  return(x[[names]])
})

# transformation for plotting
x <- x %>%
  bind_rows() %>%
  mutate(Month = factor(Month, levels=c("January", "February"))) 

# plotting everything in the same plot
plot <- ggplot(x, aes(Result, values, fill=Month))+
  geom_bar(stat="identity", position=position_dodge())

# show plot
plot

# adding a wrap, which creates different plots based on Month
plot + facet_wrap(~Month)

这是使用第一个选项时得到的: enter image description here

这就是您添加facet_wrap()的结果: enter image description here

这是你想生产的吗?