比较R中的多个类别变量

时间:2018-11-24 02:56:03

标签: r ggplot2 categorical-data

因此,我想将每个图的两个条形堆叠为一个大图。也就是说,我希望“黑人国家索赔”(来自图a)紧挨着“黑人公民权利索赔”(来自图b),因此希望将所有种族合并为一张图表。

由于某些数据(例如亚洲)非常低,是否有比较理想的方法来比较国家债权/民权债权状态与种族?

#a) State Claim?        
race_claim <- data.frame(table(jail$Race,jail$State_Claim_Made))
            names(race_claim) <- c("Race","Claim","Count")


    ggplot(data=race_claim, aes(x=Race, y=Count, fill=Claim)) + geom_bar(stat = "identity")

#b) civil rights claim?

race_claim_civ <- data.frame(table(jail$Race,jail$Non_Statutory))
names(race_claim_civ) <- c("Race","Claim","Count")

ggplot(data=race_claim_civ, aes(x=Race, y=Count, fill=Claim)) + geom_bar(stat = "identity")

数据示例:

structure(list(Last_Name = c("Banks", "Beamon", "Dandridge", 
"Deakle, Jr.", "Doyle", "Drinkard", "Ellis", "Embry", "Gaines", 
"Gurley", "Hinton", "Holemon", "Holsomback", "Hunt", "Jones", 
"Mahan", "Mahan", "McMillian", "Moore", "Padgett"), First_Name = c("Medell", 
"Melvin Todd", "Beniah Alton", "Evan Lee", "Robert E.", "Gary", 
"Andre", "Anthony", "Freddie Lee", "Timothy", "Anthony", "Jeffrey", 
"John", "H. Guy", "Lydia Diane", "Dale", "Ronnie", "Walter", 
"Daniel Wade", "Larry Randal"), Age = c("27", "24", "29", "59", 
"44", "37", "35", "23", "22", "22", "29", "23", "33", "54", "40", 
"22", "26", "45", "24", "40"), Race = c("Black", "Asian", "Caucasian", 
"Caucasian", "Other", "Asian", "Black", "Black", "Black", 
"Caucasian", "Black", "Caucasian", "Caucasian", "Other", 
"Black", "Caucasian", "Asian", "Black", "Native American", "Caucasian"
), Sex = c("Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Female", 
"Male", "Male", "Male", "Male", "Male"), State = c("Alabama", 
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", 
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", 
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", 
"Alabama"), CIU = c(0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 
0, 0, 0, 0, 1, 0), Guilty_Plea = c(1, 0, 0, 0, 0, 0, 0, 1, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), IO = c(0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Worst_Crime = c(6, 1, 
1, 4, 4, 1, 2, 1, 1, 6, 1, 2, 4, 6, 3, 2, 2, 1, 1, 1), Occurred = c(1999, 
1988, 1994, 2014, 1991, 1993, 2012, 1992, 1972, 1999, 1985, 1987, 
1987, 1987, 1997, 1983, 1983, 1986, 1999, 1990), Convicted = c(2001, 
1989, 1996, 2015, 1992, 1995, 2013, 1993, 1974, 2000, 1986, 1988, 
1988, 1993, 2000, 1986, 1986, 1988, 2002, 1992), Exonerated = c(2003, 
1990, 2015, 2015, 2001, 2001, 2014, 1997, 1991, 2002, 2015, 1999, 
2000, 1998, 2006, 1998, 1998, 1993, 2009, 1997), Sentence = c("15", 
"25", "Life", "Not sentenced", "20", "Death", "85", "20", "30", 
"35", "Death", "Life", "25", "Probation", "Life without parole", 
"35", "Life without parole", "Death", "Death", "Death"), Death_Penalty = c(0, 
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1), DNA_Only = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0), FC = c(1, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), MWID = c(0, 
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0), F_MFE = c(0, 
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1), P_FA = c(1, 
1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0), OM = c(1, 
1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1), ILD = c(0, 
0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0), State_Statute = c("Y", 
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
"Y", "Y", "Y", "Y", "Y", "Y"), State_Claim_Made = c(0, 0, 1, 
0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1 0), Zero_time = c(0, 
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), Prem = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Pending = c(0, 
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), Denied = c(0, 
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), State_Award = c("0", 
"0", "2", "0", "1", "0", "0", "0", "1", "0", "2", "0", "0", "0", 
"0", "0", "0", "0", "0", "0"), Amount = c("0", "0", NA, "0", 
"129041.88", "0", "0", "0", "1000000", "0", NA, "0", "0", "0", 
"0", "0", "0", "0", "0", "0"), `Non-Statutory_Case_Filed` = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0), No_Time = c(0, 
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), Unfiled = c(1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1), Dismissed = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), Pending__1 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Award = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), Premature = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Amount__1 = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "$ undisclosed", "0", "0"), Years_Lost = c(1.7, 
0.1, 19.5, 0, 2.6, 5.7, 1.8, 4, 10.7, 1.5, 28.5, 10.6, 10.1, 
0, 5.8, 11.4, 11.4, 4.5, 5.4, 5.5), State_Award2 = c("0", "0", 
"0", "0", "1", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0")), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))

1 个答案:

答案 0 :(得分:1)

我认为在两个要求之间存在冲突:制作barplot stack -ed和同时创建dodge -d。我的解决方案可能不是最好的,有人会做得更好。但这就是我现在得到的:

预处理

library(tidyverse)

dat <- jail %>%
  rename_all(tolower) %>%
  select(race, state_claim_made, non_statutory_case_filed) %>%
  gather(key = action, value = claim, 2, 3) %>% 
  count(race, action, claim) %>% 
  mutate(action = ifelse(action == "state_claim_made", "state", "civil")) %>%
  mutate(x = as.numeric(reorder(interaction(race, action), 1:n())))
输出:
# # A tibble: 15 x 5
#    race            action claim     n     x
# <chr>           <chr>  <dbl> <int> <dbl>
#  1 Asian           civil      0     3     1
#  2 Asian           state      0     2     2
#  3 Asian           state      1     1     2
#  4 Black           civil      0     6     3
#  5 Black           civil      1     1     3
#  6 Black           state      0     3     4
#  7 Black           state      1     4     4
#  8 Caucasian       civil      0     7     5
#  9 Caucasian       state      0     6     6
# 10 Caucasian       state      1     1     6
# 11 Native American civil      1     1     7
# 12 Native American state      1     1     8
# 13 Other           civil      0     2     9
# 14 Other           state      0     1    10
# 15 Other           state      1     1    10  

对x轴标签的一些必要调整:

改编自this answer

breaks = sort(c(unique(dat$x), seq(min(dat$x) + .5, 
                                   max(dat$x) + .5, 
                                   length(unique(dat$action))
                                   )
                )
              )

labels = unlist(
  lapply(unique(dat$race), function(i) c("civil", paste0("\n", i), "state"))
  )

绘图数据

ggplot(dat, aes(x = x, y = n, fill = factor(claim))) +
  geom_col(show.legend = T) + 
  ggthemes::theme_few() +
  scale_fill_manual(name = NULL,
                      values = c("gray75", "gray25"),
                      breaks= c("0", "1"),
                      labels = c("false", "true")
                      ) +
  scale_x_continuous(breaks = breaks, labels = labels) +
  theme(axis.title.x = element_blank(), axis.ticks.x = element_blank()) +
  labs(title = "Jail Plot", y = "Count")

jailplot

数据

您附加的数据已损坏-在表中的某个地方缺少逗号或$(我不记得那是什么)。有相同的数据,但是没有变量,我们不能解决问题。

structure(
  list(Race = c("Black", "Asian", "Caucasian", "Caucasian", "Other", "Asian", 
                "Black", "Black", "Black", "Caucasian", "Black", "Caucasian", 
                "Caucasian", "Other", "Black", "Caucasian", "Asian", "Black", 
                "Native American", "Caucasian"), 
       State_Claim_Made = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 
                            0, 1, 0), 
       Non_Statutory_Case_Filed = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                    0, 0, 0, 1, 1, 0)
       ), 
  row.names = c(NA, -20L), 
  class = c("tbl_df", "tbl", "data.frame")
  )