set.seed(999)
lookup <- mtcars[1:6,1:2] #lookup table
data <- mtcars[4:7, 1:2] #my actual data
data <- data[sample(nrow(data)),] #shuffle row-wise
data
lookup <- tibble::rownames_to_column(lookup, "id")
data <- tibble::rownames_to_column(data, "id")
data$matchornot <- match(data.frame(t(data)), data.frame(t(lookup)))
#Code ends
lookup
id mpg cyl
1 Mazda RX4 21.0 6
2 Mazda RX4 Wag 21.0 6
3 Datsun 710 22.8 4
4 Hornet 4 Drive 21.4 6
5 Hornet Sportabout 18.7 8
6 Valiant 18.1 6
data
mpg cyl
1 Hornet Sportabout 18.7 8
2 Duster 360 14.3 8
3 Hornet 4 Drive 21.4 6
4 Valiant 18.1 6
My output:
id mpg cyl matchornot
1 Hornet Sportabout 18.7 8 1
2 Duster 360 14.3 8 1 #<--There's no Duster model in lookup, suppose to be 0
3 Hornet 4 Drive 21.4 6 1
4 Valiant 18.1 6 1
嗨,想通过引用查找表找到数据帧的行方式交叉,如果有完全匹配(按行方式),则应用1/0
感谢
*更新了v1: 抱歉不够简洁..
整行必须100%匹配。例如,如果Valiant模型为99.0 mpg,则该模型的输出数据$ matchornot为“0”
答案 0 :(得分:1)
试试这个:
data$matchornot <- is.element(data$id, lookup$id)
data$matchornot <- as.numeric(data$matchornot)
<强>更新强>
match.id <- as.numeric(is.element(data$id, lookup$id))
match.mpg <- as.numeric(is.element(data$mpg, lookup$mpg))
data$matchornot <- match.id*match.mpg
现在,如果例如Valiant
有mpg 99.0
而不是18.1
,则match.mpg
的相应元素将为0
,因此0
在产品中。
请参阅下面的示例,其中Valiant
有mpg = 99.0
:
data.v2 <- data
data.v2$mpg[which(data.v2$id == "Valiant")] <- 99.0
match.id <- as.numeric(is.element(data.v2$id, lookup$id))
match.mpg <- as.numeric(is.element(data.v2$mpg, lookup$mpg))
data.v2$matchornot <- match.id*match.mpg
使用以下输出
> data.v2
id mpg cyl matchornot
1 Hornet Sportabout 18.7 8 1
2 Duster 360 14.3 8 0
3 Hornet 4 Drive 21.4 6 1
4 Valiant 99.0 6 0
答案 1 :(得分:1)
以下是使用tidyverse
dplyr::left_join
解决方案
left_join(data, lookup, by = "id") %>%
mutate(matchornot = ifelse(mpg.y == mpg.x & !is.na(mpg.y), 1, 0)) %>%
select(-mpg.y, -cyl.y) %>%
rename(mpg = mpg.x, cyl = cyl.x)
# id mpg cyl matchornot
#1 Duster 360 14.3 8 0
#2 Valiant 18.1 6 1
#3 Hornet Sportabout 18.7 8 1
#4 Hornet 4 Drive 21.4 6 1
或使用match
data$matchornot <- match(data$id, lookup$id);
data$matchornot <- ifelse(is.na(data$matchornot), 0, 1);
data;
# id mpg cyl matchornot
#1 Duster 360 14.3 8 0
#2 Valiant 18.1 6 1
#3 Hornet Sportabout 18.7 8 1
#4 Hornet 4 Drive 21.4 6 1
要匹配data
中的所有条目,您可以match
连续的行条目
data$matchornot <- match(
apply(data, 1, paste0, collapse = "_"),
apply(lookup, 1, paste0, collapse = "_"));
data$matchornot <- ifelse(is.na(data$matchornot), 0, 1);
data;
# id mpg cyl matchornot
#1 Hornet Sportabout 18.7 8 1
#2 Hornet 4 Drive 21.4 6 1
#3 Valiant 18.1 6 1
#4 Duster 360 14.3 8 0