在不同的数据框架中创建相同的变量

时间:2019-10-03 14:35:39

标签: r loops lapply

我有14个具有相同列名和相同列数的数据帧。我想在每个这些数据框中创建一个相同的变量(hi130_eur = hi130 / fixrate)。

我首先尝试了一个循环:

countries<-list(country2, country3, country4, country6, country8, country10, country11, country12,
country13, country14, country15, country51, country55, country57)

for(i in 1:length(countries)) { 
     countries[i] <- countries[i] %>% group_by(wave) %>% mutate(hi130_eur=hi130/fixrate) 
  }

其中group_by中的wave是每个data.frame中标识调查波数的列的名称。 但是,它报告以下错误:

  

UseMethod(“ group_by_”)中的错误:     没有适用于“ group_by_”的适用方法应用于“列表”类的对象

因此,我还通过以下方式尝试了lapply:

lapply(countries, function(x) { 
x<-x %>% group_by(wave) %>% mutate(hi100_eur=hi130/fixrate)
   cbind(x, hi130_eur)
   return(x)
   })

但是报告找不到hi130_eur。
关于如何在每个数据帧中正确创建相同变量的任何建议?

每个数据帧具有以下结构:

 structure(list(country = c(2L, 2L, 2L, 2L, 2L, 2L), wave = c(1L, 
1L, 1L, 1L, 1L, 1L), hid = c(7601L, 8401L, 10701L, 15701L, 15701L, 
16501L), hg004 = c(1.07908, 1.47759, 1.24982, 0.94703001, 0.94703001, 
0.92088997), hg005 = c(-9L, -9L, -9L, -9L, -9L, -9L), hg006 = c(-9L, 
-9L, -9L, -9L, -9L, -9L), hg007 = c(-9L, -9L, -9L, -9L, -9L, 
-9L), hg014 = c(1994L, 1994L, 1994L, 1994L, 1994L, 1994L), hd001 = c(1L, 
1L, 1L, 3L, 3L, 1L), hd002 = c(1L, 1L, 1L, 2L, 2L, 1L), hd003 = c(1L, 
1L, 1L, 2L, 2L, 1L), hd004 = c(1, 1, 1, 2.2, 2.2, 1), hd005 = c(1, 
1, 1, 1.8, 1.8, 1), hd006 = c(2L, 1L, 3L, 8L, 8L, 2L), hd006a = c(2L, 
3L, 4L, 11L, 11L, 5L), hd006b = c(1L, 2L, 3L, 11L, 11L, 3L), 
hd007 = c(-8L, -8L, -8L, -8L, -8L, -8L), hd008 = c(-8L, -8L, 
-8L, -8L, -8L, -8L), hd009 = c(-8L, -8L, -8L, -8L, -8L, -8L
), hd010 = c(-8L, -8L, -8L, -8L, -8L, -8L), hi001 = c(4L, 
1L, 1L, 1L, 1L, 1L), hi020 = c(0.58311999, 0.57414001, 0.57635999, 
0.6074, 0.6074, 0.56733), hi100 = c(97000L, 126550L, 90000L, 
249500L, 249500L, 156166L), hi110 = c(13000L, 106000L, 90000L, 
185500L, 185500L, 156166L), hi111 = c(13000L, 106000L, 90000L, 
185500L, 185500L, 156166L), hi112 = c(0L, 0L, 0L, 0L, 0L, 
0L), hi120 = c(0L, 0L, 0L, 0L, 0L, 0L), hi121 = c(0L, 0L, 
0L, 0L, 0L, 0L), hi122 = c(0L, 0L, 0L, 0L, 0L, 0L), hi122g = c(0L, 
0L, 0L, 0L, 0L, 0L), hi123 = c(0L, 0L, 0L, 0L, 0L, 0L), hi130 = c(84000L, 
20550L, 0L, 64000L, 64000L, 0L), hi131 = c(84000L, 0L, 0L, 
0L, 0L, 0L), hi132 = c(0L, 6150L, 0L, 0L, 0L, 0L), hi133 = c(0L, 
0L, 0L, 64000L, 64000L, 0L), hi134 = c(0L, 0L, 0L, 0L, 0L, 
0L), hi135 = c(0L, 0L, 0L, 0L, 0L, 0L), hi136 = c(0L, 0L, 
0L, 0L, 0L, 0L), hi137 = c(0L, 0L, 0L, 0L, 0L, 0L), hi138 = c(0L, 
14400L, 0L, 0L, 0L, 0L), ha005 = c(4L, 4L, 4L, 4L, 4L, 2L
), ha006 = c(2L, 2L, 2L, 3L, 3L, 3L), ha007 = c(3L, 3L, 3L, 
4L, 4L, 4L), ha008 = c(1L, 1L, 1L, 1L, 1L, 1L), ha009 = c(1L, 
1L, 1L, 1L, 1L, 1L), ha010 = c(1L, 1L, 1L, 1L, 1L, 1L), ha011 = c(1L, 
1L, 1L, 1L, 1L, 1L), ha012 = c(2L, 1L, 1L, 1L, 1L, 1L), ha013 = c(2L, 
1L, 2L, 2L, 2L, 1L), ha014 = c(1L, 2L, 2L, 1L, 1L, 2L), ha015b = c(-8L, 
-8L, -8L, -8L, -8L, -8L), ha022 = c(2L, 2L, 2L, 2L, 2L, 2L
), ha023 = c(1L, 2L, 2L, 1L, 1L, 1L), ha024a = c(1L, -8L, 
-8L, 1L, 1L, 1L), ha024b = c(1180L, -8L, -8L, 1000L, 1000L, 
800L), ha025 = c(-8L, -8L, -8L, -8L, -8L, -8L), ha026 = c(-8L, 
1L, 1L, -8L, -8L, -8L), ha027 = c(-8L, 3400L, 2150L, -8L, 
-8L, -8L), ha029 = c(-8L, 1L, 2L, -8L, -8L, -8L), ha030 = c(-8L, 
2L, 2L, -8L, -8L, -8L), ha031 = c(-8L, 1L, 1L, -8L, -8L, 
-8L), ha032 = c(-8L, 2L, 2L, -8L, -8L, -8L), ha033 = c(-8L, 
2L, 2L, -8L, -8L, -8L), ha034 = c(-8L, 2L, 2L, -8L, -8L, 
-8L), ha035 = c(-8L, 2L, 2L, -8L, -8L, -8L), hl001 = c(-8L, 
-8L, -8L, -8L, -8L, -8L), hl002 = c(-8L, -8L, -8L, -8L, -8L, 
-8L), hl003 = c(-8L, -8L, -8L, -8L, -8L, -8L), pid = c(76101, 
84101, 107101, 157101, 157102, 165101), pg002 = c(1.07218, 
1.4762599, 1.26633, 0.94999999, 0.94999999, 0.87747997), 
pg003 = c(1.07218, 1.4762599, 1.26633, 0.94999999, 0.94999999, 
0.87747997), pd003 = c(42L, 70L, 22L, 33L, 33L, 46L), pd004 = c(1L, 
1L, 2L, 2L, 1L, 2L), pe001 = c(7L, 1L, 7L, 1L, 1L, 1L), pe003 = c(5L, 
1L, 3L, 1L, 1L, 1L), pe004 = c(-8L, 1L, -8L, 1L, 1L, 1L), 
pe005 = c(-8L, 30L, -8L, 37L, 37L, 37L), pe005b = c(-8L, 
-8L, -8L, -8L, -8L, -8L), pe005c = c(-8L, 1L, -8L, 1L, 1L, 
1L), pe014 = c(-8L, -8L, -8L, 2L, 2L, -8L), pe015 = c(-8L, 
-8L, -8L, 0L, 0L, -8L), pe024 = c(-8L, -8L, -8L, -8L, -8L, 
-8L), pe025 = c(-8L, -8L, -8L, -8L, -8L, -8L), pe026 = c(-8L, 
2L, -8L, 2L, 2L, 2L), pe030 = c(-8L, 2L, -8L, 2L, 2L, 2L), 
pu001 = c(1L, 2L, 2L, 2L, 2L, 2L), pu004 = c(1L, -8L, 2L, 
-8L, -8L, -8L), pu004a = c(1L, -8L, 2L, -8L, -8L, -8L), ps002 = c(20L, 
-8L, 20L, -8L, -8L, -8L), ps006 = c(-8L, -8L, -8L, -8L, -8L, 
-8L), pi001 = c(4L, 1L, 1L, 5L, 1L, 1L), pi100 = c(97000L, 
126550L, 90000L, 118000L, 131500L, 156166L), pi110 = c(13000L, 
106000L, 90000L, 54000L, 131500L, 156166L), pi112 = c(0L, 
0L, 0L, 0L, 0L, 0L), pi121 = c(0L, 0L, 0L, 0L, 0L, 0L), pi122a = c(0L, 
0L, 0L, 0L, 0L, 0L), pi123 = c(0L, 0L, 0L, 0L, 0L, 0L), pi130 = c(84000L, 
20550L, 0L, 64000L, 0L, 0L), pi131 = c(84000L, 0L, 0L, 0L, 
0L, 0L), pi132 = c(0L, 6150L, 0L, 0L, 0L, 0L), pi133 = c(0L, 
0L, 0L, 64000L, 0L, 0L), pi134 = c(0L, 0L, 0L, 0L, 0L, 0L
), pi135 = c(0L, 0L, 0L, 0L, 0L, 0L), pi136 = c(0L, 0L, 0L, 
0L, 0L, 0L), pi137a = c(0L, 0L, 0L, 0L, 0L, 0L), pi138a = c(0L, 
14400L, 0L, 0L, 0L, 0L), pt022 = c(2L, 2L, 2L, 2L, 2L, 1L
), pt023 = c(-9L, 20L, 19L, 28L, 20L, 23L), pt024 = c(-8L, 
-8L, -8L, -8L, -8L, -8L), ph006 = c(2L, 2L, 1L, 2L, 2L, 2L
), ph007 = c(0L, 0L, 2L, 0L, 0L, 0L), ph008 = c(-8L, -8L, 
-8L, -8L, -8L, -8L), ph009 = c(-8L, -8L, -8L, -8L, -8L, -8L
), ph012 = c(-8L, -8L, -8L, -8L, -8L, -8L), ph013 = c(3L, 
3L, 3L, 3L, 3L, 1L), ph022 = c(-8, -8, -8, -8, -8, -8), pr006 = c(4L, 
4L, 4L, 1L, 1L, 2L), pr007 = c(-8L, -8L, -8L, 3L, 3L, -8L
), pr008 = c(-8L, -8L, -8L, -8L, -8L, 1L), weight_prsn = c(1.07908, 
1.47759, 1.24982, 2.84109, 2.84109, 0.92088997), fixrate = c(1, 
1, 1, 1, 1, 1), hi100_eur = c(97000, 126550, 90000, 249500, 
249500, 156166), pareto = c(378337.646856238, 645715.836409999, 
373027.903072689, 392088.525948343, 459127.982320312, 363013.212050222
), gpd = c(NA, NA, NA, NA, NA, NA)), row.names = c(NA, 6L
), class = "data.frame")

1 个答案:

答案 0 :(得分:1)

考虑到这些评论,我决定重新编写我认为可以解决您的问题的内容,如下所示。

样本数据

您的数据集中有124个变量,但要证明您有可能解决该问题的方法,country, wave, hi130, and fixrate就足够了。因此,下面显示了两个样本数据帧对象(country1country2)。

#dput(country1)
structure(list(country = c(2L, 2L, 2L, 2L, 2L, 2L), wave = c(1L, 
1L, 1L, 1L, 1L, 1L), hi130 = c(84000L, 20550L, 0L, 64000L, 64000L, 
0L), fixrate = c(1, 1, 1, 1, 1, 1)), class = "data.frame", row.names = c(NA, 
6L))
# -------------------------------------------------------------------------
#   country wave hi130 fixrate
# 1       2    1 84000       1
# 2       2    1 20550       1
# 3       2    1     0       1
# 4       2    1 64000       1
# 5       2    1 64000       1
# 6       2    1     0       1
# -------------------------------------------------------------------------
#dput(country2)
structure(list(country = c(1, 1, 1, 1, 1, 1), wave = c(2, 2, 
2, 2, 2, 2), hi130 = c(0, 59800, 20440, 19181, 121213, 0), fixrate = c(3, 
3, 3, 3, 3, 3)), class = "data.frame", row.names = c(NA, -6L))
# -------------------------------------------------------------------------
#   country wave  hi130 fixrate
# 1       1    2      0       3
# 2       1    2  59800       3
# 3       1    2  20440       3
# 4       1    2  19181       3
# 5       1    2 121213       3
# 6       1    2      0       3

功能

以下功能用于获取所需的输出。

library(dplyr)
my_func <- function(df, grp="wave", hi130="hi130", fixrate="fixrate") {
  df %>%
    group_by_(.dots = grp) %>%
    mutate(hi130_eur=(hi130/fixrate))

}
#Example usage
my_func(country1)
# -------------------------------------------------------------------------
my_func(country1)
# # A tibble: 6 x 5
# # Groups:   wave [1]
#   country  wave hi130 fixrate hi130_eur
#     <int> <int> <int>   <dbl>     <dbl>
# 1       2     1 84000       1     84000
# 2       2     1 20550       1     20550
# 3       2     1     0       1         0
# 4       2     1 64000       1     64000
# 5       2     1 64000       1     64000
# 6       2     1     0       1         0
# 

应用于数据框对象列表

这是将函数应用于数据框对象列表的方法。

# Add your dataframe objects to the list below
countries <-list(country1, country2) 

# lapply and save the output to df_list
df_list <- lapply(countries, my_func)

您可以使用countries覆盖列表df_list中的各个数据框对象,如下所示。

for(i in seq_along(df_list)) {
  assign(paste0("country", i), df_list[[i]])
}
# -------------------------------------------------------------------------
# country2
# # A tibble: 6 x 5
# # Groups:   wave [1]
#   country  wave  hi130 fixrate hi130_eur
#     <dbl> <dbl>  <dbl>   <dbl>     <dbl>
# 1       1     2      0       3        0 
# 2       1     2  59800       3    19933.
# 3       1     2  20440       3     6813.
# 4       1     2  19181       3     6394.
# 5       1     2 121213       3    40404.
# 6       1     2      0       3        0 
# 

您可以检查其他数据框对象;现在所有的都将有hi130_eur列。解决这个问题可能有一种有效的方法,但这是我目前所拥有的方法。请参阅?assign,以了解其功能。

让我知道您是否仍然遇到问题。