使用R从另一个创建新的重复表,从原始添加新值

时间:2017-02-14 14:11:55

标签: r

我需要从原始表(下面两个)创建一个表。

在原始表中,我们有A和B族,每个族的成员由PESS列表示。受益人的每个家庭成员在BEN栏中标有1号。 从这个表中,我需要生成一个新表,在其中你应该有2个列。以家庭A为例,成员1和成员4是受益者。然后,家庭A应该加倍成两组,一次只有一个受益人(列I_BPC_FAM2)。 FAM2列表示组。

使用下面的代码,我正在生成新表,但是 I_BPC_FAM2 列丢失了。问题必须在R。

中解决

是否可以完成此代码以进入决赛桌?

library(tidyverse) 
tabela<-data.frame(FAM=c("A","A","A","A","B","B","B"), PESS=c(1,2,3,4,1,2,3),BEN=c(1,0,0,1,0,0,1))
tabela1<- summarise(group_by(tabela,FAM),contador=sum(BEN),cont=n()) #faz a tabela com contadores

tab2<-NULL
for(i in 1:length(tabela1$FAM)){
    x<-as.numeric(tabela1[i,"contador"])
    j<-as.numeric(tabela1[i,"cont"])
    for(l in 1:x){
      for(k in 1:j){
         tab<-data.frame(tabela1[i,"FAM"],PESS=as.numeric(k),FAM2=as.numeric(l))
         tab2<-rbind(tab2,tab)
         final<-merge(tab2,tabela,by=c("FAM","PESS"))
         final <- final[order(final$FAM, final$FAM2), ] 
      }
   }  
 }

原始表:

> tabela
  FAM PESS BEN
1   A    1   1
2   A    2   0
3   A    3   0
4   A    4   1
5   B    1   0
6   B    2   0
7   B    3   1

我的代码生成的表

> final
   FAM PESS FAM2 BEN
1    A    1    1   1
3    A    2    1   0
5    A    3    1   0
7    A    4    1   1
2    A    1    2   1
4    A    2    2   0
6    A    3    2   0
8    A    4    2   1
9    B    1    1   0
10   B    2    1   0
11   B    3    1   1

表I我需要生成

   FAM PESS FAM2 BEN I_BPC_FAM2
1    A    1    1   1         1
3    A    2    1   0         0
5    A    3    1   0         0 
7    A    4    1   1         0
2    A    1    2   1         0
4    A    2    2   0         0
6    A    3    2   0         0
8    A    4    2   1         1
9    B    1    1   0         0
10   B    2    1   0         0
11   B    3    1   1         1

4 个答案:

答案 0 :(得分:2)

这是另一种方式:

table <-data.frame(FAM=c("A","A","A","A","B","B","B"), 
                   PESS=c(1,2,3,4,1,2,3),
                   BEN=c(1,0,0,1,0,0,1))

为每个观察创建一个唯一的ID:

table %<>% mutate( unique_id = row_number())

子集您想要获得的唯一家庭组:

ben <- 
  table %>%
  filter(BEN == 1) %>% 
  mutate(FAM2 = unique_id) %>% 
  select(FAM2, FAM)

> ben
  FAM2 FAM
1    1   A
2    4   A
3    7   B

合并并比较ID:

new_table<- merge(ben, table, by = "FAM") %>%
            mutate(I_BPC_FAM2 = as.integer(unique_id == FAM2)) %>%
            select(-unique_id)

结果是:

new_table
> new_table
  FAM FAM2 PESS BEN I_BPC_FAM2
1    A    1    1   1          1
2    A    1    2   0          0
3    A    1    3   0          0
4    A    1    4   1          0
5    A    4    1   1          0
6    A    4    2   0          0
7    A    4    3   0          0
8    A    4    4   1          1
9    B    7    1   0          0
10   B    7    2   0          0
11   B    7    3   1          1

如果需要,您可以转换新的家庭ID:

> new_table %>% mutate(FAM2 = as.integer(as.factor(FAM2)))
   FAM FAM2 PESS BEN I_BPC_FAM2
1    A    1    1   1          1
2    A    1    2   0          0
3    A    1    3   0          0
4    A    1    4   1          0
5    A    2    1   1          0
6    A    2    2   0          0
7    A    2    3   0          0
8    A    2    4   1          1
9    B    3    1   0          0
10   B    3    2   0          0
11   B    3    3   1          1

答案 1 :(得分:2)

关于尼古拉斯的代码,我会改变这一部分:

ben <-  table %>%
    filter(BEN == 1) %>% 
    mutate(ID = unique_id)

    ben %<>% 
    group_by(FAM) %>%  
    mutate(FAM2=cumsum(BEN)) %>%
    select(ID,FAM2,FAM)


new_table<- merge(ben, table, by = "FAM") %>%
    mutate(I_BPC_FAM2 = as.integer(unique_id == ID)) %>%
    select(-unique_id,-ID)

结果如下:

 > new_table
    FAM FAM2 PESS BEN I_BPC_FAM2
1    A    1    1   1          1
2    A    1    2   0          0
3    A    1    3   0          0
4    A    1    4   1          0
5    A    2    1   1          0
6    A    2    2   0          0
7    A    2    3   0          0
8    A    2    4   1          1
9    B    1    1   0          0
10   B    1    2   0          0
11   B    1    3   1          1

现在我们的FAM2拥有正确数量的家庭 这对于函数max()

来说是实用的

答案 2 :(得分:0)

那应该有所帮助。可能可以组合一些命令,但我试图尽可能地分析并避免使用for循环。逐步运行链式命令以查看其工作原理。

library(dplyr)

# original dataset
dt <- data.frame(FAM=c("A","A","A","A","B","B","B"), PESS=c(1,2,3,4,1,2,3),BEN=c(1,0,0,1,0,0,1))

# create multiple rows of FAM based on how many 1s they have in column BEN
dt %>%
  group_by(FAM) %>%
  mutate(sum_BEN = sum(BEN)) %>%
  group_by(FAM, PESS) %>%
  do(data.frame(., FAM2=seq(1,.$sum_BEN))) %>%
  select(-sum_BEN) %>%
  ungroup() %>%
  arrange(FAM, FAM2) %>%
  print() -> tbl1

# # A tibble: 11 × 4
#       FAM  PESS   BEN  FAM2
#    <fctr> <dbl> <dbl> <int>
# 1       A     1     1     1
# 2       A     2     0     1
# 3       A     3     0     1
# 4       A     4     1     1
# 5       A     1     1     2
# 6       A     2     0     2
# 7       A     3     0     2
# 8       A     4     1     2
# 9       B     1     0     1
# 10      B     2     0     1
# 11      B     3     1     1


# keep the relevant rows of FAM to put 1 for I_BPC_FAM2
dt %>%
  arrange(FAM, PESS) %>%
  group_by(FAM) %>%
  mutate(cumsum_BEN = cumsum(BEN)) %>%
  ungroup() %>%
  distinct(FAM, BEN, cumsum_BEN, .keep_all = T) %>%
  filter(BEN != 0) %>%
  mutate(I_BPC_FAM2 = 1) %>%
  rename(FAM2 = cumsum_BEN) %>%
  print() -> tbl2

# # A tibble: 3 × 5
#      FAM  PESS   BEN  FAM2 I_BPC_FAM2
#   <fctr> <dbl> <dbl> <dbl>      <dbl>
# 1      A     1     1     1          1
# 2      A     4     1     2          1
# 3      B     3     1     1          1


# join tables
tbl1 %>% 
  left_join(tbl2, by=c("FAM","PESS","BEN","FAM2")) %>%
  mutate(I_BPC_FAM2 = coalesce(I_BPC_FAM2, 0)) %>%
  arrange(FAM, FAM2)

# # A tibble: 11 × 5
#       FAM  PESS   BEN  FAM2 I_BPC_FAM2
#    <fctr> <dbl> <dbl> <dbl>      <dbl>
# 1       A     1     1     1          1
# 2       A     2     0     1          0
# 3       A     3     0     1          0
# 4       A     4     1     1          0
# 5       A     1     1     2          0
# 6       A     2     0     2          0
# 7       A     3     0     2          0
# 8       A     4     1     2          1
# 9       B     1     0     1          0
# 10      B     2     0     1          0
# 11      B     3     1     1          1

答案 3 :(得分:0)

以下是使用拆分应用合并方法的基础R解决方案xxxsplitlapply / do.call

rbind

现在,您可以将列表与

放在一起
# construct of data.frames, one for each family 
myList <- lapply(split(df, df$FAM), function(i) {
  bens <- which(i$BEN == 1) # get the benefit indices
  rows <- nrow(i) # store the number of rows
  i <- i[rep(seq_len(rows), length(bens)),] # grow data.frame for each benefit
  i$I_BPC_FAM2 <- 0 # initialize variable
  i$I_BPC_FAM2[bens + (rows * (seq_along(bens)-1))] <- 1 fill in indicator
  i # return new data.frame
})