Question

我有一个带有几列的数据框，这些列代表某个方程式中的系数。我想评估这个方程并将其添加到数据帧中（例如逐行计算）。但是要评估方程式，我需要所有系数作为行向量。这是一个例子：

d = data.frame(id = 1:2,name=c("a","b"),
               c1 = 3:4,c2=5:6,c3=2:3,
               x1=1:2,x2=7:8,x3=3:2)

我需要计算c1 * x1 + c2 * x2 + x3 * x3，但是输入精确的方程式是不切实际的。在实际情况下，有数十种。理想情况下，我想选择它们并将它们视为行向量，如下所示：

# not a real code, but a wishful thinkg
d %>% mutate(result = sum((select(starts_with(c)) %>% as.vector)*
                           select(starts_with(x)) %>% as.vector)
                      )
             )

这当然不能那样工作，但是我想知道您是否知道在进行像mutate这样的逐行计算时是否将一组列作为行向量。

谢谢。

Answer 1

一个想法是如下gather数据帧。结果在CX列中。

library(tidyverse)

d2 <- d %>%
  gather(Type, Value, -id, -name) %>%
  separate(Type, into = c("Letter", "Number"), sep = 1) %>%
  spread(Letter, Value) %>%
  mutate(CX = c * x) %>%
  group_by(name) %>%
  summarize(CX = sum(CX))
d2
# # A tibble: 2 x 2
#   name     CX
#   <fct> <int>
# 1 a        44
# 2 b        62

这是dplyr的选项。但是，如果要使用此解决方案，则必须确保列的顺序正确。

dc <- d %>% select(starts_with("c"))
dx <- d %>% select(starts_with("x"))
d3 <- dc * dx 
d4 <- bind_cols(d %>% select(id, name), d3) %>% mutate(CX = rowSums(d3))
d4
#   id name c1 c2 c3 CX
# 1  1    a  3 35  6 44
# 2  2    b  8 48  6 62

这是上述代码的基数R。

dc <- d[, grepl("^c", names(d))]
dx <- d[, grepl("^x", names(d))]
d3 <- dc * dx 
d3$CX <- rowSums(d3)
d4 <- cbind(d[, c("id", "name")], d3)
d4
#   id name c1 c2 c3 CX
# 1  1    a  3 35  6 44
# 2  2    b  8 48  6 62

Answer 2

使用nest和map，您可以执行以下操作：

library(tidyverse)

d %>% 
  group_by(id) %>%
  nest() %>% 
  mutate(cx = map_dbl(data, ~ sum(select(.x, starts_with("c")) * select(.x, starts_with("x")))))

# A tibble: 2 x 3
#      id data                cx
#   <int> <list>           <dbl>
# 1     1 <tibble [1 x 7]>    44
# 2     2 <tibble [1 x 7]>    62

或者，您可以使用do以便将数据框用作输入：

d %>% 
  group_by(id) %>% 
  do(mutate(., cx = sum(select(., starts_with("c")) * select(., starts_with("x")))))

# A tibble: 2 x 9
# Groups:   id [2]
#      id name     c1    c2    c3    x1    x2    x3    cx
#   <int> <fct> <int> <int> <int> <int> <int> <int> <int>
# 1     1 a         3     5     2     1     7     3    44
# 2     2 b         4     6     3     2     8     2    62

要确保采用正确的乘积（即x1 * c1而不是x1 * c2），您可以先提取x和c可用的最大数量，然后使用它来获得正确的列匹配：

col_numbers <- d %>% select(matches("^(x|c)")) %>% names() %>% parse_number() 
max_col_number <- which.max(col_numbers[duplicated(col_numbers)])

d %>% 
  group_by(id) %>% 
  do(mutate(., cx = sum(select(., str_c("c", seq_len(max_col_number))) * select(., str_c("x", seq_len(max_col_number))))))

或者您可以先排列列，然后使用上面的方法：

d <- select(d, id, name, sort(current_vars()))

d %>% 
  group_by(id) %>% 
  do(mutate(., cx = sum(select(., starts_with("c")) * select(., starts_with("x")))))

在mutate（dplyr）中-将几列视为行向量

2 个答案: