Question

我有一个调查表数据，如下所示：

   items no_stars1  no_stars2   no_stars3   average satisfied   bad
1     A         1           0           0         0         0     1
2     B         0           1           0         1         0     0
3     C         0           0           1         0         1     0
4     D         0           1           0         0         1     0
5     E         0           0           1         1         0     0
6     F         0           0           1         0         1     0
7     G         1           0           0         0         0     1

基本上，标题列（星级和令人满意的星数）是每个项目的序号。我想将no_stars（col 2：4）和令人满意的（col 5：7）归纳为一列，以便输出看起来像这样：

   items    no_stars    satisfactory    
1     A         1           1           
2     B         2           2           
3     C         3           3           
4     D         2           3           
5     E         3           2           
6     F         3           3           
7     G         1           1

$ no_stars <-1代表no_stars1，2代表no_stars2，3代表no_stars3

$ satisfactory <-1表示不好，2表示平均，3表示良好

我尝试了下面的代码

df$no_stars2[df$no_stars2 == 1] <- 2
df$no_stars3[df$no_stars3 == 1] <- 3

df$average[df$average == 1] <- 2
df$satisfied[df$satisfied == 1] <- 3

no_stars <- df$no_stars1 + df$no_stars2 + df$no_stars3
satisfactory <- df$bad + df$average + df$satisfied

tidy_df <- data.frame(df$Items, no_stars, satisfactory)
tidy_df

R中是否有可以执行相同操作的函数？要么任何人都有更好，更简单的解决方案？

谢谢

Answer 1

只需使用max.col并设置首选项：

starsOrder<-c("no_stars1","no_stars2","no_stars3")
satOrder<-c("bad","average","satisfied")
data.frame(items=df$items,no_stars=max.col(df[,starsOrder]),
            satisfactory=max.col(df[,satOrder]))
#  items no_stars satisfactory
#1     A        1            1
#2     B        2            2
#3     C        3            3
#4     D        2            3
#5     E        3            2
#6     F        3            3
#7     G        1            1

Answer 2

另一种tidyverse解决方案，利用factor到integer的转换来对no_stars和satisfactory进行编码，并从宽到长的两倍传播：

library(tidyverse)
df %>%
    gather(no_stars, v1, starts_with("no_stars")) %>%
    mutate(no_stars = as.integer(factor(no_stars))) %>%
    gather(satisfactory, v2, average, satisfied, bad) %>%
    filter(v1 > 0 & v2 > 0) %>%
    mutate(satisfactory = as.integer(factor(
        satisfactory, levels = c("bad", "average", "satisfied")))) %>%
    select(-v1, -v2) %>%
    arrange(items)
#  items no_stars satisfactory
#1     A        1            1
#2     B        2            2
#3     C        3            3
#4     D        2            3
#5     E        3            2
#6     F        3            3
#7     G        1            1

Answer 3

虽然可能会有更优雅的解决方案，但使用dplyr::case_when()可让您灵活地编写所需的代码：

library(dplyr)

df %>% 
  dplyr::mutate(
    no_stars = dplyr::case_when(
      no_stars1 == 1 ~ 1,
      no_stars2 == 1 ~ 2,
      no_stars3 == 1 ~ 3)
    , satisfactory = dplyr::case_when(
      average   == 1 ~ 2,
      satisfied == 1 ~ 3,
      bad       == 1 ~ 1)
  )
# items no_stars1 no_stars2 no_stars3 average satisfied bad no_stars satisfactory
# 1     A         1         0         0       0         0   1        1            1
# 2     B         0         1         0       1         0   0        2            2
# 3     C         0         0         1       0         1   0        3            3
# 4     D         0         1         0       0         1   0        2            3
# 5     E         0         0         1       1         0   0        3            2
# 6     F         0         0         1       0         1   0        3            3
# 7     G         1         0         0       0         0   1        1            1

Answer 4

dat%>%
   replace(.==1,NA)%>%
   replace_na(setNames(as.list(names(.)),names(.)))%>%
   replace(.==0,NA)%>%
   mutate(s=coalesce(!!!.[2:4]),
          no_stars=as.numeric(factor(s,unique(s))),
          t=coalesce(!!!.[5:7]),
          satisfactory=as.numeric(factor(t,unique(t))))%>%
   select(items,no_stars,satisfactory)

  items no_stars satisfactory
1     A        1            1
2     B        2            2
3     C        3            3
4     D        2            3
5     E        3            2
6     F        3            3
7     G        1            1

Answer 5

使用apply和match：

data.frame(
  items        = df1$items,
  no_stars     = apply(df1[2:4], 1, match, x=1),
  satisfactory = apply(df1[c(7,5:6)], 1, match, x=1))

#   items no_stars satisfactory
# 1     A        1            1
# 2     B        2            2
# 3     C        3            3
# 4     D        2            3
# 5     E        3            2
# 6     F        3            3
# 7     G        1            1

数据

df1 <- read.table(header=TRUE,stringsAsFactors=FALSE,text="
   items no_stars1  no_stars2   no_stars3   average satisfied   bad
1     A         1           0           0         0         0     1
2     B         0           1           0         1         0     0
3     C         0           0           1         0         1     0
4     D         0           1           0         0         1     0
5     E         0           0           1         1         0     0
6     F         0           0           1         0         1     0
7     G         1           0           0         0         0     1")

R：df标头列为有序排名，并针对每个观察结果跨列分布

5 个答案: