我有一个类似于以下形式的数据集:
Age Food_1_1 Food_1_2 Food_1_3 Type_1_1 Type_1_2 Type_1_3
6 a b a Pr Vit Min
6 b c Min Vit
6 c c a Pr Pr Vit
7 a a b Pr Pr Vit
7 c a Pr Vit
8 b b a Vit Vit Pr
从这个数据集中,我想创建以下列:
axPr axVit axMin bxpr bxVit bxMin cxPr cxVit cxMin
1 0 1 0 1 0 0 0 0
0 0 0 0 0 1 0 1 0
0 1 0 0 0 0 2 0 0
2 0 0 0 1 0 0 0 0
0 1 0 0 0 0 1 0 0
1 0 0 0 2 0 0 0 0
这些数字代表每个食物类别在每种类型中出现的次数。提供的数据中的所有变量都是因素。
答案 0 :(得分:0)
使用dplyr
和tidyr
:
library(dplyr)
library(tidyr)
df %>%
#Create a column with row number
mutate(row = row_number()) %>%
#Get the data in long format
pivot_longer(cols = -c(Age, row),
names_to = c('.value', 'col'),
names_pattern = '(.*?)_(.*)',
values_drop_na = TRUE) %>%
#Connvert columns to factor
mutate(across(c(Food, Type), factor)) %>%
#In old dplyr use mutate_at
#mutate_at(vars(c(Food, Type)), factor) %>%
#Count each occurrence of food and type
count(row, Food, Type,.drop = FALSE) %>%
#Use unite to combine columns
unite(col, Food, Type, sep = 'x') %>%
#Get data in wide format
pivot_wider(names_from = col, values_from = n, values_fill = list(n = 0)) %>%
select(-row)
# A tibble: 6 x 9
# axMin axPr axVit bxMin bxPr bxVit cxMin cxPr cxVit
# <int> <int> <int> <int> <int> <int> <int> <int> <int>
#1 1 1 0 0 0 1 0 0 0
#2 0 0 0 1 0 0 0 0 1
#3 0 0 1 0 0 0 0 2 0
#4 0 2 0 0 0 1 0 0 0
#5 0 0 1 0 0 0 0 1 0
#6 0 1 0 0 0 2 0 0 0
数据
df <- structure(list(Age = c(6L, 6L, 6L, 7L, 7L, 8L), Food_1_1 = c("a",
NA, "c", "a", "c", "b"), Food_1_2 = c("b", "b", "c", "a", NA,
"b"), Food_1_3 = c("a", "c", "a", "b", "a", "a"), Type_1_1 = c("Pr",
NA, "Pr", "Pr", "Pr", "Vit"), Type_1_2 = c("Vit", "Min", "Pr",
"Pr", NA, "Vit"), Type_1_3 = c("Min", "Vit", "Vit", "Vit", "Vit",
"Pr")), class = "data.frame", row.names = c(NA, -6L))