我有一个相当大的数据框,其中变量以年度本地货币计价(在下面的示例中,澳大利亚和奥地利货币):
Country Var _1995 _1996 _1997 _1998
AUS GO 1 014 828 1 059 326 1 119 101 1 194 995
AUS L 36 873 38 895 39 502 40 425
AUS K 41 498 45 008 48 683 47 252
AUT GO 289 923 299 487 309 734 323 273
AUT GO 8 032 7 849 8 049 7 815
AUT L 1 094 1 151 1 163 1 152
AUT K 12 032 11 760 11 743 11 611
我想使用这些乘数将此数据框中的值转换为1995美元:
Country _1995 _1996 _1997 _1998
AUS 0,7415 0,78295 0,74406 0,6294
AUT 1,36646 1,30031 1,12904 1,11319
因此,对于表1中包含变量AUS的每一行,每年的值乘以表2中包含AUS的行的相应$ 1995乘数。同样适用于包含AUT的每一行,以及作为我的数据框中的38个其他国家/地区代码。
所以,在第一行中,我希望R执行此计算:
Country Var _1995 _1996 _1997 _1998
AUS GO 1014828*0,7415 1059326*0,78295 1119101*0,74406 1194995*0,6294
等等。这可行吗?非常感谢!
答案 0 :(得分:2)
我建议从宽到长格式重塑,这将大大简化这一过程。重塑是最复杂的部分。我在这里使用示例数据和reshape
命令显示它,但您也可以使用dplyr
或reshape2
或其他任何内容。
基本上,将两个数据集重塑为long,然后合并它们,执行乘法(以长格式,只是简单的向量乘法),然后重新变换为宽。
以下是示例数据(与您的类似):
set.seed(1)
dat <- data.frame(Country = rep(c("AUS", "AUT"), each = 3),
Var = rep(c("GO", "L", "K"), times = 2),
v_1996 = rnorm(6), v_1997 = rnorm(6), v_1998 = rnorm(6),
stringsAsFactors = FALSE)
multipliers <- data.frame(Country = c("AUS", "AUT"),
v_1995 = c(0.7415, 1.36646),
v_1996 = c(0.78295, 1.30031),
v_1997 = c(0.74406, 1.12904),
v_1998 = c(0.6294, 1.11319), stringsAsFactors = FALSE)
以下是进行转换的代码:
long <- reshape(dat, times = 1996:1998, v.names = "Value",
varying = c("v_1996", "v_1997", "v_1998"),
direction = "long")
head(long, 3)
# Country Var time Value id
# 1.1996 AUS GO 1996 -0.6264538 1
# 2.1996 AUS L 1996 0.1836433 2
# 3.1996 AUS K 1996 -0.8356286 3
# 4.1996 AUT GO 1996 1.5952808 4
mlong <- reshape(multipliers, times = 1995:1998, v.names = "mult",
varying = c("v_1995","v_1996", "v_1997", "v_1998"),
direction = "long")
head(mlong, 3)
# Country time mult id
# 1.1995 AUS 1995 0.74150 1
# 2.1995 AUT 1995 1.36646 2
# 1.1996 AUS 1996 0.78295 1
merged <- merge(long, mlong, by = c("Country", "time"))
merged$converted <- merged$Value * merged$mult
head(merged, 3)
# Country time Var Value id.x mult id.y converted
# 1 AUS 1996 GO -0.6264538 1 0.78295 1 -0.4904820
# 2 AUS 1996 L 0.1836433 2 0.78295 1 0.1437835
# 3 AUS 1996 K -0.8356286 3 0.78295 1 -0.6542554
reshape(merged, idvar = c("Country", "Var"), direction = "wide",
drop = c("id.x", "id.y","mult"))
# Country Var Value.1996 converted.1996 Value.1997 converted.1997 Value.1998 converted.1998
# 1 AUS GO -0.6264538 -0.4904820 0.4874291 0.3626765 -0.62124058 -0.39100882
# 2 AUS L 0.1836433 0.1437835 0.7383247 0.5493579 -2.21469989 -1.39393211
# 3 AUS K -0.8356286 -0.6542554 0.5757814 0.4284159 1.12493092 0.70803152
# 10 AUT GO 1.5952808 2.0743596 -0.3053884 -0.3447957 -0.04493361 -0.05001964
# 11 AUT L 0.3295078 0.4284623 1.5117812 1.7068614 -0.01619026 -0.01802284
# 12 AUT K -0.8204684 -1.0668632 0.3898432 0.4401486 0.94383621 1.05066903
答案 1 :(得分:1)
这样的事情:
(假设您的本地货币数据框称为“本地”,带有乘数的数据框名为“conv”。)
#unfactorise Country or you'll get very strange results
local$Country <- as.character(local$Country); conv$Country <- as.character(conv$Country)
countries <- unique(local$Country)
for(i in 1:length(countries)) {
cy <- countries[i]
rates <- matrix(conv[conv$Country==cy, -1])
local[local$Country==cy, -c(1,2)] <- local[local$Country==cy, -c(1,2)] * rates
}
答案 2 :(得分:1)
创建一个小帮助函数然后管道数据可能是最简单的。为了使其更清晰,请将row.names
次转化设置为国家/地区并删除该列。
df <- read.table(header = TRUE, text = '
Country Var _1995 _1996 _1997 _1998
AUS GO 1014828 1059326 1119101 1194995
AUS L 36873 38895 39502 40425
AUS K 41498 45008 48683 47252
AUT GO 289923 299487 309734 323273
AUT GO 8032 7849 8049 7815
AUT L 1094 1151 1163 1152
AUT K 12032 11760 11743 11611
')
conversions <- read.table(header = TRUE, text='
Country _1995 _1996 _1997 _1998
AUS 0.7415 0.78295 0.74406 0.6294
AUT 1.36646 1.30031 1.12904 1.11319
')
# the primary code to use
# set row.names, makes indexing cleaner below
row.names(conversions) <- conversions$Country
conversions <- conversions[,-1]
# helper function for conversions
myfun <- function(df1, df2) {
df1[,3:6] <- df1[,3:6] * df2[df1$Country,]
df1
}
library(dplyr)
df %>%
group_by(Country) %>%
do(myfun(., conversions))
Source: local data frame [7 x 6]
Groups: Country
Country Var X_1995 X_1996 X_1997 X_1998
1 AUS GO 752494.962 829399.292 832678.290 752129.853
2 AUS L 27341.330 30452.840 29391.858 25443.495
3 AUS K 30770.767 35239.014 36223.073 29740.409
4 AUT GO 396168.183 389425.941 349702.075 359864.271
5 AUT GO 10975.407 10206.133 9087.643 8699.580
6 AUT L 1494.907 1496.657 1313.074 1282.395
7 AUT K 16441.247 15291.646 13258.317 12925.249
答案 3 :(得分:1)
以下是我使用dplyr
的尝试。我正在以各种方式进行实验并想出了这个。我首先将数据(即mydf
)拆分为Country
。对于列表中的每个数据框,我想应用适当的汇率。因此,我使用rate
对汇率数据(即Country
)进行了子集化并创建了新数据。 (当代码运行时,R正在为每个国家/地区提取汇率。)我在this question中应用了我的答案,以便使用mutate_each()
计算多个列。最后,我使用bind_rows()
来组合所有数据框。
lapply(split(mydf, mydf$Country), function(i) {
foo <- rate[rate$Country == unique(i$Country),]
mutate_each(i, funs(. * foo$.), y_1995:y_1998)
}) %>%
bind_rows
# Country Var y_1995 y_1996 y_1997 y_1998
#1 AUS GO 752494.962 829399.292 832678.290 752129.853
#2 AUS L 27341.330 30452.840 29391.858 25443.495
#3 AUS K 30770.767 35239.014 36223.073 29740.409
#4 AUT GO 396168.183 389425.941 349702.075 359864.271
#5 AUT GO 10975.407 10206.133 9087.643 8699.580
#6 AUT L 1494.907 1496.657 1313.074 1282.395
#7 AUT K 16441.247 15291.646 13258.317 12925.249
DATA
mydf <- structure(list(Country = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
2L), .Label = c("AUS", "AUT"), class = "factor"), Var = structure(c(1L,
3L, 2L, 1L, 1L, 3L, 2L), .Label = c("GO", "K", "L"), class = "factor"),
y_1995 = c(1014828, 36873, 41498, 289923, 8032, 1094, 12032
), y_1996 = c(1059326, 38895, 45008, 299487, 7849, 1151,
11760), y_1997 = c(1119101, 39502, 48683, 309734, 8049, 1163,
11743), y_1998 = c(1194995, 40425, 47252, 323273, 7815, 1152,
11611)), .Names = c("Country", "Var", "y_1995", "y_1996",
"y_1997", "y_1998"), row.names = c(NA, -7L), class = "data.frame")
# Country Var y_1995 y_1996 y_1997 y_1998
#1 AUS GO 1014828 1059326 1119101 1194995
#2 AUS L 36873 38895 39502 40425
#3 AUS K 41498 45008 48683 47252
#4 AUT GO 289923 299487 309734 323273
#5 AUT GO 8032 7849 8049 7815
#6 AUT L 1094 1151 1163 1152
#7 AUT K 12032 11760 11743 11611
rate <- structure(list(Country = structure(1:2, .Label = c("AUS", "AUT"
), class = "factor"), y_1995 = c(0.7415, 1.36646), y_1996 = c(0.78295,
1.30031), y_1997 = c(0.74406, 1.12904), y_1998 = c(0.6294, 1.11319
)), .Names = c("Country", "y_1995", "y_1996", "y_1997", "y_1998"
), row.names = c(NA, -2L), class = "data.frame")
# Country y_1995 y_1996 y_1997 y_1998
#1 AUS 0.74150 0.78295 0.74406 0.62940
#2 AUT 1.36646 1.30031 1.12904 1.11319