df1 <- data.frame(MLID=c('992','992','BJR'),
Position=c('N0','N1','N1'),
Weight=c(0.125,0.58,0.69))
df2 <- data.frame(MLID=c('992','992','992','992',
'BJR','BJR','BJR','BJR'),
Weight=c(0,0.251,0.501,1.001,
0,0.251,0.501,1.001),
N0=c(2.80,4.05,4.05,4.05,
4.05,4.05,4.05,4.05),
N1=c(3.47,4.73,4.95,5.15,
4.73,7.73,4.95,5.15) )
我要合并的两个表遵循规则:
因此最终输出应为:
MILD Position Weight Charge
992 N0 0.125 2.8
992 N1 0.580 4.95
BJR N1 0.690 4.95
在R中有可能吗?尤其是在dplyr软件包中?
答案 0 :(得分:4)
可以实现使用data.table
rolling
连接的选项。首先,需要使用df2
在long-format
中转换melt
,然后将df1
和df2
都加入。
library(data.table)
setDT(df1, key = c("MLID", "Position","Weight") )
df2 <- melt(df2, id.vars = c("MLID","Weight"), variable.name = "Position",
value.name = "Charge")
setDT(df2, key = c("MLID", "Position","Weight"))
df2[df1, roll = "nearest"]
# MLID Weight Position Charge
# 1: 992 0.580 N1 4.95
# 2: 992 0.125 NO 2.80
# 3: BJR 0.690 N1 4.95
选项2:一种基于tidyverse
的方法可以是:
library(tidyverse)
df2 %>% gather(Position, Charge, -MLID, -Weight) %>%
right_join(df1, by=c("MLID", "Position")) %>%
filter(Weight.x <= Weight.y) %>%
group_by(MLID, Position) %>%
arrange(Weight.y-Weight.x) %>%
slice(1) %>%
select(MLID, Weight = Weight.y, Position, Charge)
# # A tibble: 3 x 4
# # Groups: MLID, Position [3]
# MLID Weight Position Charge
# <chr> <dbl> <chr> <dbl>
# 1 992 0.580 N1 4.95
# 2 992 0.125 NO 2.80
# 3 BJR 0.690 N1 4.95
数据:
为了避免不必要的警告,对OP's
数据进行了略微修改,以在stringsAsFactors = FALSE
中包含data.frame
自变量。
df1 <- data.frame(MLID=c('992','992','BJR'),
Position=c('NO','N1','N1'),
Weight=c(0.125,0.58,0.69), stringsAsFactors = FALSE)
df2 <- data.frame(MLID=c('992','992','992','992',
'BJR','BJR','BJR','BJR'),
Weight=c(0,0.251,0.501,1.001,
0,0.251,0.501,1.001),
NO=c(2.80,4.05,4.05,4.05,
4.05,4.05,4.05,4.05),
N1=c(3.47,4.73,4.95,5.15,
4.73,7.73,4.95,5.15), stringsAsFactors = FALSE )
答案 1 :(得分:2)
我们可以对data.table
使用非等号联接。使用melt
将第二个数据集整形为'long'格式,并在“ MLID”,“ Position”和“ Weight”列上的非等式比较中加入第一个数据,并分配last
值“费用”的字段以在“ df1”中创建列
library(data.table)
setDT(df1)[setnames(melt(setDT(df2), measure = c("NO", "N1"),
variable.name = "Position", value.name = "Charge"), "Weight", "wt"),
Charge := Charge, on = .(MLID, Position, Weight > wt), mult = "last"]
df1
# MLID Position Weight Charge
#1: 992 NO 0.125 2.80
#2: 992 N1 0.580 4.95
#3: BJR N1 0.690 4.95
答案 2 :(得分:1)
这是基本的R版本:
outdf <- merge(df1, df2, by = "MLID")
outdf$dist <- abs(outdf$Weight.x - outdf$Weight.y)
ting <- aggregate(dist ~ MLID + Position, FUN = function(x) min(x), data = outdf)
outdf2 <- merge(outdf, ting, by.x = c("MLID", "Position", "dist"))
outdf2$charge <- ifelse(outdf2$Position == "N1", outdf2$N1, outdf2$NO)
outdf2 <- outdf2[,c("MLID", "Position", "Weight.x", "charge")]
outdf2
# MLID Position Weight.x charge
# 1 992 N1 0.580 4.95
# 2 992 NO 0.125 2.80
# 3 BJR N1 0.690 4.95