我有两个数据框,我正在尝试从中创建新数据。
df 1的colnames:
"year_t" "data_t" "data_t_1" "data_t_2" "data_t_3"
df 2的colnames:
"year" "multiplyer"
我试图除以df2
中的数据。获取数据并在正确的年份首先匹配它们,将data_t
除以multiplier
中的相应年份df2
,然后取data_t_1
并将其除以multiplier year - 1
,取data_t_2
并执行相同的multiplier year - 2
。
output_t = data_t / multiplier
output_t_1 = data_t_1 / multiplier - 1 (year)
output_t_2 = data_t_2 / multiplier - 2 (years)
output_t_3 = data_t_3 / multiplier - 3 (years)
即(使用df1
的第一行);
year_t data_t data_t_1 data_t_2 data_t_3
1 2012 146123 162991 308060 406563
在df2
2009 98.2319416221847
2010 100.000000000000000000000000000000
2011 103.196146412241
2012 105.720324344817
我想要得到以下内容;
df1$output_t <- 145123 / 105.720324344817
df$output_t_1 <- 162991 / 103.196146412241
df$output_t_2 <- 308060 / 100.000000000000000000000000000000
df$output_t_3 <- 406563 / 98.2319416221847
与说法相同;
output_t = data_t / multiplier (for year 2012)
output_t_1 = data_t_1 / multiplier - 1 (year 2011)
output_t_2 = data_t_2 / multiplier - 2 (years 2010)
output_t_3 = data_t_3 / multiplier - 3 (years 2009)
数据框1:
structure(list(year_t = structure(c(18L, 16L, 3L, 7L, 21L, 15L,
2L, 21L, 2L, 17L, 17L, 3L, 14L, 13L, 15L, 18L, 19L, 14L, 13L,
14L, 16L, 21L, 12L, 11L, 19L, 17L, 2L, 5L, 15L, 19L, 19L, 9L,
17L, 19L, 8L, 14L, 4L, 18L, 16L, 17L, 4L, 19L, 15L, 17L, 8L,
17L, 18L, 19L, 6L, 15L), .Label = c("1995", "1996", "1997", "1998",
"1999", "2000", "2001", "2002", "2003", "2004", "2005", "2006",
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014",
"2015", "2016", "2017"), class = "factor"), data_t = c(146123L,
824675L, 78601L, 338308L, 12321527L, 301767L, 261683L, 20810L,
438264L, 420102L, 54325L, 1557915L, 6229156L, 3312145L, 6577744L,
1633416L, 475845L, 851586L, 658845L, 6216087L, 2069090L, 28046L,
622088L, 244276L, 1796582L, 7607498L, 2680537L, 195115L, 1325434L,
870810L, 399998L, 1755193L, 39280000L, 2612835L, 2643000L, 5660759L,
201188L, 1855899L, 2393468L, 278147L, 1291212L, 4082284L, 7051999L,
15342597L, 35949L, 1606024L, 2448224L, 91427L, 1054759L, 121252L
), data_t_1 = c(162991L, 1278341L, 433815L, 315210L, 13280222L,
144622L, 280714L, 184286L, 349399L, 441119L, 51123L, 1112556L,
3240105L, 2467071L, 9515093L, 1686249L, 461023L, 927146L, 507399L,
3561613L, 1929679L, 69828L, 622204L, 242895L, 1968208L, 8453347L,
2467278L, 92742L, 1194816L, 1197646L, 547391L, 1845368L, 38550000L,
4555685L, 5158000L, 6324394L, 241155L, 2420718L, 3180737L, 5768459L,
1548164L, 4318517L, 9019486L, 14386327L, 35934L, 2044495L, 2598361L,
135402L, NA, 336379L), data_t_2 = c(308060L, 1746234L, 2473258L,
249339L, 14327822L, NA, 259635L, 455523L, 370401L, 455568L, 40985L,
1321363L, 1449123L, 1928196L, 9661314L, 2367151L, 375473L, 1228645L,
420788L, 99090L, 1976669L, 150717L, NA, 271140L, 3995829L, 8166218L,
2175989L, 87277L, 1097358L, NA, 788137L, 1695421L, 39801000L,
4372307L, 142504L, 3439554L, 114912L, 3388745L, 2834629L, 7034688L,
1462947L, 4537559L, 8000863L, 12737184L, 150782L, 2333824L, 2710126L,
254109L, NA, 1140718L), data_t_3 = c(406563L, 1769192L, NA, 212706L,
14351345L, NA, 238441L, 888216L, 255452L, 488883L, 97195L, 1106291L,
35366L, 1388799L, 7684599L, 2425390L, 4152L, 953383L, 542362L,
NA, 2499211L, 167215L, NA, 145238L, 8647716L, 7866078L, 1874842L,
NA, 2158139L, NA, 736088L, 1425002L, 44633000L, 3831578L, 159060L,
2781418L, 85171L, 3159740L, 2463385L, 8038953L, 1346105L, 4578169L,
6277353L, 12501786L, 61561L, 2966259L, 2733420L, 298200L, NA,
697574L)), .Names = c("year_t", "data_t", "data_t_1", "data_t_2",
"data_t_3"), row.names = c(NA, 50L), class = "data.frame")
数据框2:
structure(list(year = c("1988", "1989", "1990", "1991", "1992",
"1993", "1994", "1995", "1996", "1997", "1998", "1999", "2000",
"2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008",
"2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016",
"2017"), multiplyer = c("45.5158032501528", "48.6069799436928",
"51.8742543668362", "54.9525837849594", "58.208266424359", "60.8678441879755",
"63.7398405992427", "66.7189155811034", "69.093337017433", "70.4552205020515",
"71.7476020867733", "73.4052195938148", "75.9256001089651", "78.6512027854055",
"81.0624649001436", "83.5261426387297", "86.064670544112", "88.9637193712232",
"92.0911276421223", "94.6577354325631", "98.5156636153782", "98.2319416221847",
"100.000000000000000000000000000000", "103.196146412241", "105.720324344817",
"107.209444053708", "107.047696829365", "106.511964511277", "106.296094858635",
"108.375335054149")), .Names = c("year", "multiplyer"), row.names = c(NA,
-30L), class = c("tbl_df", "tbl", "data.frame"))
答案 0 :(得分:2)
如果第一个表名为df1,第二个表名为df2,则应该为您提供所需内容:
library(tidyverse)
df1 %>%
mutate(
year_t = as.integer(as.character(year_t)),
id = seq_len(n())
) %>%
gather(time, measure, -c(year_t, id)) %>%
mutate(
time = as.integer(gsub('^[^0-9]+', '', gsub('t$', 't0', time))),
year = as.character(year_t - time)
) %>%
left_join(df2, by = 'year') %>%
mutate(
multiplyer = as.numeric(multiplyer),
output = measure / multiplyer,
time = paste0('weighted_t_', time)
) %>%
select(id, time, output) %>%
spread(time, output) %>%
left_join(df1 %>% mutate(id = seq_len(n())), by = 'id') %>%
select(year_t:data_t_3, everything())
我不得不做很多转换类型,因为它们在表之间不匹配或者没有以最佳方式存储(例如,因子而不是年份的整数或存储为字符的乘数)。