我需要从原始表(下面两个)创建一个表。
在原始表中,我们有A和B族,每个族的成员由PESS列表示。受益人的每个家庭成员在BEN栏中标有1号。 从这个表中,我需要生成一个新表,在其中你应该有2个列。以家庭A为例,成员1和成员4是受益者。然后,家庭A应该加倍成两组,一次只有一个受益人(列I_BPC_FAM2)。 FAM2列表示组。
使用下面的代码,我正在生成新表,但是 I_BPC_FAM2 列丢失了。问题必须在R。
中解决是否可以完成此代码以进入决赛桌?
library(tidyverse)
tabela<-data.frame(FAM=c("A","A","A","A","B","B","B"), PESS=c(1,2,3,4,1,2,3),BEN=c(1,0,0,1,0,0,1))
tabela1<- summarise(group_by(tabela,FAM),contador=sum(BEN),cont=n()) #faz a tabela com contadores
tab2<-NULL
for(i in 1:length(tabela1$FAM)){
x<-as.numeric(tabela1[i,"contador"])
j<-as.numeric(tabela1[i,"cont"])
for(l in 1:x){
for(k in 1:j){
tab<-data.frame(tabela1[i,"FAM"],PESS=as.numeric(k),FAM2=as.numeric(l))
tab2<-rbind(tab2,tab)
final<-merge(tab2,tabela,by=c("FAM","PESS"))
final <- final[order(final$FAM, final$FAM2), ]
}
}
}
原始表:
> tabela
FAM PESS BEN
1 A 1 1
2 A 2 0
3 A 3 0
4 A 4 1
5 B 1 0
6 B 2 0
7 B 3 1
我的代码生成的表
> final
FAM PESS FAM2 BEN
1 A 1 1 1
3 A 2 1 0
5 A 3 1 0
7 A 4 1 1
2 A 1 2 1
4 A 2 2 0
6 A 3 2 0
8 A 4 2 1
9 B 1 1 0
10 B 2 1 0
11 B 3 1 1
表I我需要生成
FAM PESS FAM2 BEN I_BPC_FAM2
1 A 1 1 1 1
3 A 2 1 0 0
5 A 3 1 0 0
7 A 4 1 1 0
2 A 1 2 1 0
4 A 2 2 0 0
6 A 3 2 0 0
8 A 4 2 1 1
9 B 1 1 0 0
10 B 2 1 0 0
11 B 3 1 1 1
答案 0 :(得分:2)
这是另一种方式:
table <-data.frame(FAM=c("A","A","A","A","B","B","B"),
PESS=c(1,2,3,4,1,2,3),
BEN=c(1,0,0,1,0,0,1))
为每个观察创建一个唯一的ID:
table %<>% mutate( unique_id = row_number())
子集您想要获得的唯一家庭组:
ben <-
table %>%
filter(BEN == 1) %>%
mutate(FAM2 = unique_id) %>%
select(FAM2, FAM)
> ben
FAM2 FAM
1 1 A
2 4 A
3 7 B
合并并比较ID:
new_table<- merge(ben, table, by = "FAM") %>%
mutate(I_BPC_FAM2 = as.integer(unique_id == FAM2)) %>%
select(-unique_id)
结果是:
new_table
> new_table
FAM FAM2 PESS BEN I_BPC_FAM2
1 A 1 1 1 1
2 A 1 2 0 0
3 A 1 3 0 0
4 A 1 4 1 0
5 A 4 1 1 0
6 A 4 2 0 0
7 A 4 3 0 0
8 A 4 4 1 1
9 B 7 1 0 0
10 B 7 2 0 0
11 B 7 3 1 1
如果需要,您可以转换新的家庭ID:
> new_table %>% mutate(FAM2 = as.integer(as.factor(FAM2)))
FAM FAM2 PESS BEN I_BPC_FAM2
1 A 1 1 1 1
2 A 1 2 0 0
3 A 1 3 0 0
4 A 1 4 1 0
5 A 2 1 1 0
6 A 2 2 0 0
7 A 2 3 0 0
8 A 2 4 1 1
9 B 3 1 0 0
10 B 3 2 0 0
11 B 3 3 1 1
答案 1 :(得分:2)
关于尼古拉斯的代码,我会改变这一部分:
ben <- table %>%
filter(BEN == 1) %>%
mutate(ID = unique_id)
ben %<>%
group_by(FAM) %>%
mutate(FAM2=cumsum(BEN)) %>%
select(ID,FAM2,FAM)
new_table<- merge(ben, table, by = "FAM") %>%
mutate(I_BPC_FAM2 = as.integer(unique_id == ID)) %>%
select(-unique_id,-ID)
结果如下:
> new_table
FAM FAM2 PESS BEN I_BPC_FAM2
1 A 1 1 1 1
2 A 1 2 0 0
3 A 1 3 0 0
4 A 1 4 1 0
5 A 2 1 1 0
6 A 2 2 0 0
7 A 2 3 0 0
8 A 2 4 1 1
9 B 1 1 0 0
10 B 1 2 0 0
11 B 1 3 1 1
现在我们的FAM2拥有正确数量的家庭 这对于函数max()
来说是实用的答案 2 :(得分:0)
那应该有所帮助。可能可以组合一些命令,但我试图尽可能地分析并避免使用for循环。逐步运行链式命令以查看其工作原理。
library(dplyr)
# original dataset
dt <- data.frame(FAM=c("A","A","A","A","B","B","B"), PESS=c(1,2,3,4,1,2,3),BEN=c(1,0,0,1,0,0,1))
# create multiple rows of FAM based on how many 1s they have in column BEN
dt %>%
group_by(FAM) %>%
mutate(sum_BEN = sum(BEN)) %>%
group_by(FAM, PESS) %>%
do(data.frame(., FAM2=seq(1,.$sum_BEN))) %>%
select(-sum_BEN) %>%
ungroup() %>%
arrange(FAM, FAM2) %>%
print() -> tbl1
# # A tibble: 11 × 4
# FAM PESS BEN FAM2
# <fctr> <dbl> <dbl> <int>
# 1 A 1 1 1
# 2 A 2 0 1
# 3 A 3 0 1
# 4 A 4 1 1
# 5 A 1 1 2
# 6 A 2 0 2
# 7 A 3 0 2
# 8 A 4 1 2
# 9 B 1 0 1
# 10 B 2 0 1
# 11 B 3 1 1
# keep the relevant rows of FAM to put 1 for I_BPC_FAM2
dt %>%
arrange(FAM, PESS) %>%
group_by(FAM) %>%
mutate(cumsum_BEN = cumsum(BEN)) %>%
ungroup() %>%
distinct(FAM, BEN, cumsum_BEN, .keep_all = T) %>%
filter(BEN != 0) %>%
mutate(I_BPC_FAM2 = 1) %>%
rename(FAM2 = cumsum_BEN) %>%
print() -> tbl2
# # A tibble: 3 × 5
# FAM PESS BEN FAM2 I_BPC_FAM2
# <fctr> <dbl> <dbl> <dbl> <dbl>
# 1 A 1 1 1 1
# 2 A 4 1 2 1
# 3 B 3 1 1 1
# join tables
tbl1 %>%
left_join(tbl2, by=c("FAM","PESS","BEN","FAM2")) %>%
mutate(I_BPC_FAM2 = coalesce(I_BPC_FAM2, 0)) %>%
arrange(FAM, FAM2)
# # A tibble: 11 × 5
# FAM PESS BEN FAM2 I_BPC_FAM2
# <fctr> <dbl> <dbl> <dbl> <dbl>
# 1 A 1 1 1 1
# 2 A 2 0 1 0
# 3 A 3 0 1 0
# 4 A 4 1 1 0
# 5 A 1 1 2 0
# 6 A 2 0 2 0
# 7 A 3 0 2 0
# 8 A 4 1 2 1
# 9 B 1 0 1 0
# 10 B 2 0 1 0
# 11 B 3 1 1 1
答案 3 :(得分:0)
以下是使用拆分应用合并方法的基础R解决方案xxx
,split
和lapply
/ do.call
。
rbind
现在,您可以将列表与
放在一起# construct of data.frames, one for each family
myList <- lapply(split(df, df$FAM), function(i) {
bens <- which(i$BEN == 1) # get the benefit indices
rows <- nrow(i) # store the number of rows
i <- i[rep(seq_len(rows), length(bens)),] # grow data.frame for each benefit
i$I_BPC_FAM2 <- 0 # initialize variable
i$I_BPC_FAM2[bens + (rows * (seq_along(bens)-1))] <- 1 fill in indicator
i # return new data.frame
})