我有一个包含以下数据的数据集:
Ticket Feed back Date Month Rating
12345 The resolution was proper 01-01-2019 January 5
12346 The ticket was closed without notice 02-01-2019 January 3
12347 Good 03-01-2019 January 4
12354 He is highly reliable. 03-02-2019 February 4
12355 He accomplished all tasks 04-02-2019 February 4
我正在使用以下代码进行情感分析:
否定词词典-https://gist.github.com/mkulakowski2/4289441
正词词典-https://gist.github.com/mkulakowski2/4289437 (从第36行开始创建了2个txt文件)
library(stringr)
library(tm)
str(positive) #positive words dictionary
str(negative) #negatiive words dictionary
file <- sample_reviews$`Feed back`
file <- tolower(file)
filee <- removeWords(file,stopwords("english"))
filee <- removePunctuation(filee)
filee <- stripWhitespace(filee)
filee <- removeNumbers(filee)
filr <- str_split(filee,pattern="\\s+")
fg <- unlist(str_split(filee,pattern="\\s+"))
match(fg,positive)
match(fg,negative)
a<-sum(!is.na(match(fg,positive)))
b<-sum(!is.na(match(fg,negative)))
c<- a+b
Positiveperc <- (a/c)*100
Negativeperc <- (b/c)*100
mat<-matrix(c(Positiveperc,Negativeperc),ncol=1)
colnames(mat) <- c('values')
rownames(mat) <- c('Positive','Negative')
dat<- cbind("Result"=rownames(mat), mat)
mydata<-data.frame(dat)
“ mydata”变量中的数据框如下所示:
Result values
Positive "Positive" "57.785"
Negative "Negative" "48.214"
我需要针对每个月创建我的数据框。 如:
Month ValuePostive Valuenegative
January 34 66
February 50 50
使用当前输出无法获得出现的肯定和否定单词的总体百分比。 我应该怎么做才能将其划分为每月百分比图?就像每个月一样,我需要正面和负面情绪的百分比。
答案 0 :(得分:1)
这可能是您要创建的:
library(tidyverse)
library(reshape2)
library(tm)
# your data
x <- tibble(Ticket = c(12345, 12346, 12347 ,12354, 12355),
Feedback = c("The resolution was proper", "The ticket was closed without notice", "Good" ,"He is highly reliable.", "He accomplished all tasks"),
Date = c("01-01-2019", "02-01-2019", "03-01-2019", "03-02-2019", "04-02-2019"),
Month = c("January", "January", "January", "February", "February"),
Rating = c(5,4,4,4,4))
# reading lists
negative <- read_tsv("negative.txt",col_names=F)$X1
positive <- read_tsv("positive.txt",col_names=F)$X1
str(positive) #positive words dictionary
str(negative) #negatiive words dictionary
sample_reviews <- x
# List-Conversion
sample_reviews <- split(sample_reviews, sample_reviews$Month)
# Your code executed for each month
x <- lapply(sample_reviews, function(x){
file <- x$Feedback
file <- tolower(file)
filee <- removeWords(file,stopwords("english"))
filee <- removePunctuation(filee)
filee <- stripWhitespace(filee)
filee <- removeNumbers(filee)
filr <- str_split(filee,pattern="\\s+")
fg <- unlist(str_split(filee,pattern="\\s+"))
match(fg,positive)
match(fg,negative)
a<-sum(!is.na(match(fg,positive)))
b<-sum(!is.na(match(fg,negative)))
c<- a+b
Positiveperc <- (a/c)*100
Negativeperc <- (b/c)*100
mat<-matrix(c(Positiveperc,Negativeperc),ncol=1)
colnames(mat) <- c('values')
rownames(mat) <- c('Positive','Negative')
dat<- cbind("Result"=rownames(mat), mat)
mydata<-data.frame(dat)
return(mydata)
})
# Add month as column
x <- lapply(names(x), function(names){
x[[names]]$Month <- names
return(x[[names]])
})
# transformation for plotting
x <- x %>%
bind_rows() %>%
mutate(Month = factor(Month, levels=c("January", "February")))
# plotting everything in the same plot
plot <- ggplot(x, aes(Result, values, fill=Month))+
geom_bar(stat="identity", position=position_dodge())
# show plot
plot
# adding a wrap, which creates different plots based on Month
plot + facet_wrap(~Month)
这是你想生产的吗?