使用col中的值从另一个col中选择值,在R中放入新的df

时间:2018-05-21 02:54:15

标签: r

我有这样的df

name <- c("Fred","Mark","Jen","Simon","Ed")
a_or_b <- c("a","a","b","a","b")
abc_ah_one <- c(3,5,2,4,7)
abc_bh_one <- c(5,4,1,9,8)
abc_ah_two <- c(2,1,3,7,6)
abc_bh_two <- c(3,6,8,8,5)
abc_ah_three <- c(5,4,7,6,2)
abc_bh_three <- c(9,7,2,1,4)
def_ah_one <- c(1,3,9,2,7)
def_bh_one <- c(2,8,4,6,1)
def_ah_two <- c(4,7,3,2,5)
def_bh_two <- c(5,2,9,8,3)
def_ah_three <- c(8,5,3,5,2)
def_bh_three <- c(2,7,4,3,0)
df <- data.frame(name,a_or_b,abc_ah_one,abc_bh_one,abc_ah_two,abc_bh_two,
abc_ah_three,abc_bh_three,def_ah_one,def_bh_one,
def_ah_two,def_bh_two,def_ah_three,def_bh_three)

我想使用“a_or_b”列中的值来为每个“abc”(一个,两个和三个)选择每个相应“ah / bh”列中的值,并将其放入一个新数据中帧。例如,Fred在新df中的行中将具有值3,2和5。这些值表示abc列的每个“ah”类别的值。在她的a_or_b列中有“b”的Jen,将在她的abc列中获得所有她的“bh”值,用于新df中的行。这是我想要的输出:

combo_one <- c(3,5,1,4,8)
combo_two <- c(2,1,8,7,5)
combo_three <- c(5,4,2,6,4)
df2 <- data.frame(name,a_or_b,combo_one,combo_two,combo_three)

我尝试使用sapply。下面给出了每行df[grep("abc",colnames(df),fixed=TRUE)]的正确列正确索引的矩阵:

sapply(paste0(df$a_or_b,"h"),grep,colnames(df[grep("abc",colnames(df),fixed=TRUE)])) 

3 个答案:

答案 0 :(得分:1)

首先,我们将您的数据收集到整齐的长格式中,然后将列分解为有用的内容。之后过滤很简单,如果有必要,我们可以转换回困难的宽格式:

library(dplyr)
library(tidyr)

gather(df, key = "var", value = "val", -name, -a_or_b) %>%
  separate(var, into = c("combo", "h", "ind"), sep = "_") %>%
  mutate(h = substr(h, 1, 1)) %>%
  filter(a_or_b == h, combo == "abc") %>%
  arrange(name) -> result_long
result_long
#     name a_or_b combo h   ind val
# 1     Ed      b   abc b   one   8
# 2     Ed      b   abc b   two   5
# 3     Ed      b   abc b three   4
# 4   Fred      a   abc a   one   3
# 5   Fred      a   abc a   two   2
# 6   Fred      a   abc a three   5
# 7    Jen      b   abc b   one   1
# 8    Jen      b   abc b   two   8
# 9    Jen      b   abc b three   2
# 10  Mark      a   abc a   one   5
# 11  Mark      a   abc a   two   1
# 12  Mark      a   abc a three   4
# 13 Simon      a   abc a   one   4
# 14 Simon      a   abc a   two   7
# 15 Simon      a   abc a three   6

spread(result_long, key = ind, value = val) %>%
  select(name, a_or_b, one, two, three)
#    name a_or_b one two three
# 1    Ed      b   8   5     4
# 2  Fred      a   3   2     5
# 3   Jen      b   1   8     2
# 4  Mark      a   5   1     4
# 5 Simon      a   4   7     6

答案 1 :(得分:1)

Base R方法将使用lapply,我们循环遍历数据框的每一行,创建一个字符串,使用基于paste0列的a_or_b找到相似的列,然后{{1每行的所有值。

rbind

我们可以new_df <- do.call("rbind", lapply(seq(nrow(df)), function(x) setNames(df[x, grepl(paste0("abc_",df[x,"a_or_b"], "h"), colnames(df))], c("combo_one", "combo_two", "combo_three")))) new_df # combo_one combo_two combo_three #1 3 2 5 #2 5 1 4 #3 1 8 2 #4 4 7 6 #5 8 5 4 所需的列:

cbind

答案 2 :(得分:0)

使用mapmutate的组合可以做到这一点:

require(tidyverse)

df %>%
  select(name, a_or_b, starts_with("abc")) %>%
  rename_if(is.numeric, funs(sub("abc_", "", .))) %>%
  mutate(combo_one = map_chr(a_or_b, ~ paste0(.x,"h_one")),
         combo_one = !!combo_one,
         combo_two = map_chr(a_or_b, ~ paste0(.x,"h_two")),
         combo_two = !!combo_two,
         combo_three = map_chr(a_or_b, ~ paste0(.x,"h_three")),
         combo_three = !!combo_three) %>%
  select(name, a_or_b, starts_with("combo"))

输出:

   name a_or_b combo_one combo_two combo_three
1  Fred      a         3         2           5
2  Mark      a         5         1           4
3   Jen      b         1         8           2
4 Simon      a         4         7           6
5    Ed      b         8         5           4