Question

我有一个类似于以下形式的数据集：

Age Food_1_1    Food_1_2    Food_1_3    Type_1_1    Type_1_2   Type_1_3
6      a             b         a           Pr          Vit       Min
6                    b         c                       Min       Vit
6      c             c         a           Pr          Pr        Vit
7      a             a         b           Pr          Pr        Vit
7      c                       a           Pr                    Vit
8      b             b         a           Vit         Vit       Pr

从这个数据集中，我想创建以下列：

axPr    axVit   axMin   bxpr    bxVit   bxMin   cxPr    cxVit   cxMin
1         0       1      0        1       0       0       0       0
0         0       0      0        0       1       0       1       0
0         1       0      0        0       0       2       0       0
2         0       0      0        1       0       0       0       0
0         1       0      0        0       0       1       0       0
1         0       0      0        2       0       0       0       0

这些数字代表每个食物类别在每种类型中出现的次数。提供的数据中的所有变量都是因素。

Answer 1

使用dplyr和tidyr：

library(dplyr)
library(tidyr)

df %>%
  #Create a column with row number
  mutate(row = row_number()) %>%
  #Get the data in long format
  pivot_longer(cols = -c(Age, row),
               names_to = c('.value', 'col'), 
               names_pattern = '(.*?)_(.*)',
               values_drop_na = TRUE) %>%
  #Connvert columns to factor
  mutate(across(c(Food, Type), factor)) %>%
  #In old dplyr use mutate_at
  #mutate_at(vars(c(Food, Type)), factor) %>%
  #Count each occurrence of food and type
  count(row, Food, Type,.drop = FALSE) %>%
  #Use unite to combine columns
  unite(col, Food, Type, sep = 'x') %>%
  #Get data in wide format
  pivot_wider(names_from = col, values_from = n, values_fill = list(n = 0)) %>%
  select(-row)


# A tibble: 6 x 9
#  axMin  axPr axVit bxMin  bxPr bxVit cxMin  cxPr cxVit
#  <int> <int> <int> <int> <int> <int> <int> <int> <int>
#1     1     1     0     0     0     1     0     0     0
#2     0     0     0     1     0     0     0     0     1
#3     0     0     1     0     0     0     0     2     0
#4     0     2     0     0     0     1     0     0     0
#5     0     0     1     0     0     0     0     1     0
#6     0     1     0     0     0     2     0     0     0

数据

df <- structure(list(Age = c(6L, 6L, 6L, 7L, 7L, 8L), Food_1_1 = c("a", 
NA, "c", "a", "c", "b"), Food_1_2 = c("b", "b", "c", "a", NA, 
"b"), Food_1_3 = c("a", "c", "a", "b", "a", "a"), Type_1_1 = c("Pr", 
NA, "Pr", "Pr", "Pr", "Vit"), Type_1_2 = c("Vit", "Min", "Pr", 
"Pr", NA, "Vit"), Type_1_3 = c("Min", "Vit", "Vit", "Vit", "Vit", 
"Pr")), class = "data.frame", row.names = c(NA, -6L))

基于R中的现有列创建多个列

1 个答案: