我想知道当存在两个不同的过滤标准时,如何将数据从一个数据帧提取到另一数据帧。
我的数据框A是
TransactionNo Date AccountNo TransactionType Amount Currency
1 1000001 2019-07-01 10001 Spend -12.44 SGD
2 1000002 2019-07-01 10001 Spend -31.92 CNY
3 1000003 2019-07-03 10001 Spend -8.08 USD
4 1000004 2019-07-04 10001 Spend -6.02 SGD
5 1000005 2019-07-05 10001 Spend -5.04 USD
6 1000006 2019-07-06 10001 Spend -8.43 SGD
我的数据框B是
Date USD CNY SGD
<dttm> <dbl> <dbl> <dbl>
1 2019-07-01 00:00:00 1.35 0.198 1
2 2019-07-02 00:00:00 1.40 0.198 1
3 2019-07-03 00:00:00 1.36 0.197 1
4 2019-07-04 00:00:00 1.37 0.197 1
5 2019-07-05 00:00:00 1.38 0.197 1
6 2019-07-06 00:00:00 1.39 0.197 1
我想在数据框A中创建一个称为Exchange的新列。该新列中的条目可以取自数据框B。取入的条目将取决于两个数据框内的Currency匹配和Date匹配。例如,数据帧A中的第一行为1。第二行为0.198。第三行是1.36。
结束代码看起来像
A <- mutate(A, Exchange = XXXXXXXXXXX)
我了解每个人都有自己的责任去履行生活,因此,您的时间很宝贵。我非常感谢大家在帮助我这样的新程序员学习基础编程方面付出的宝贵时间和精力。非常感谢你们。
答案 0 :(得分:3)
在base R
中,为行和列创建索引,cbind
并提取值
i1 <- match(as.Date(A$Date), as.Date(B$Date))
j1 <- match(A$Currency, names(B)[-1])
A$Exchange <- B[-1][cbind(i1, j1)]
A$Exchange
#[1] 1.000 0.198 1.360 1.000 1.380 1.000
或者使用tidyverse
,我们将数据的形状从'wide'改成'long'('B'),然后进行联接
library(dplyr)
library(tidyr)
B %>%
mutate(Date = as.Date(Date)) %>%
pivot_longer(cols = -Date, names_to = 'Currency', values_to = 'Exchange') %>%
right_join(A %>%
mutate(Date = as.Date(Date)))
# A tibble: 6 x 7
# Date Currency Exchange TransactionNo AccountNo TransactionType Amount
# <date> <chr> <dbl> <int> <int> <chr> <dbl>
#1 2019-07-01 SGD 1 1000001 10001 Spend -12.4
#2 2019-07-01 CNY 0.198 1000002 10001 Spend -31.9
#3 2019-07-03 USD 1.36 1000003 10001 Spend -8.08
#4 2019-07-04 SGD 1 1000004 10001 Spend -6.02
#5 2019-07-05 USD 1.38 1000005 10001 Spend -5.04
#6 2019-07-06 SGD 1 1000006 10001 Spend -8.43
A <- structure(list(TransactionNo = 1000001:1000006, Date = c("2019-07-01",
"2019-07-01", "2019-07-03", "2019-07-04", "2019-07-05", "2019-07-06"
), AccountNo = c(10001L, 10001L, 10001L, 10001L, 10001L, 10001L
), TransactionType = c("Spend", "Spend", "Spend", "Spend", "Spend",
"Spend"), Amount = c(-12.44, -31.92, -8.08, -6.02, -5.04, -8.43
), Currency = c("SGD", "CNY", "USD", "SGD", "USD", "SGD")),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
B <- structure(list(Date = c("2019-07-01 00:00:00", "2019-07-02 00:00:00",
"2019-07-03 00:00:00", "2019-07-04 00:00:00", "2019-07-05 00:00:00",
"2019-07-06 00:00:00"), USD = c(1.35, 1.4, 1.36, 1.37, 1.38,
1.39), CNY = c(0.198, 0.198, 0.197, 0.197, 0.197, 0.197), SGD = c(1L,
1L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
答案 1 :(得分:1)
效率不及Akrun的效率,但还有另一个Base R解决方案:
# Reshape B from wide to long:
long_B <-
data.frame(
reshape(
B,
direction = "long",
varying = names(B)[names(B) != "Date"],
v.names = "Exchange",
idvar = "Date",
timevar = "Currency",
times = names(B)[names(B) != "Date"]
),
row.names = NULL
)
# Left join on intersecting vectors:
a_left_join_b <-
merge(A,
long_B,
by = intersect(colnames(A), colnames(long_B)),
all.x = TRUE)
替代方法:
ab <-
B %>%
gather(key = "Currency", value = "Exchange", -Date) %>%
right_join(A, by = intersect(colnames(.), colnames(A)), all.y = TRUE)
A <-
structure(
list(
TransactionNo = 1000001:1000006,
Date = as.POSIXct(
c(
"2019-07-01",
"2019-07-01",
"2019-07-03",
"2019-07-04",
"2019-07-05",
"2019-07-06"
),
tz = Sys.timezone()
),
AccountNo = c(10001L, 10001L, 10001L, 10001L, 10001L, 10001L),
TransactionType = c("Spend", "Spend", "Spend", "Spend", "Spend",
"Spend"),
Amount = c(-12.44, -31.92, -8.08, -6.02, -5.04, -8.43),
Currency = c("SGD", "CNY", "USD", "SGD", "USD", "SGD")
),
class = "data.frame",
row.names = c("1",
"2", "3", "4", "5", "6")
)
B <-
structure(
list(
Date = as.POSIXct(
c(
"2019-07-01 00:00:00",
"2019-07-02 00:00:00",
"2019-07-03 00:00:00",
"2019-07-04 00:00:00",
"2019-07-05 00:00:00",
"2019-07-06 00:00:00"
),
tz = Sys.timezone()
),
USD = c(1.35, 1.4, 1.36, 1.37, 1.38,
1.39),
CNY = c(0.198, 0.198, 0.197, 0.197, 0.197, 0.197),
SGD = c(1L,
1L, 1L, 1L, 1L, 1L)
),
class = "data.frame",
row.names = c("1",
"2", "3", "4", "5", "6")
)