library(tidyverse)
下面提供的代码会创建三个数据框 - Main,LookUp和Final。我正在尝试使用Main和LookUp数据帧来创建Final数据帧。
例如,Final表仅保留具有LookUp表的Section_Lookup中提供的数字的“Sections”,同时还保留相应的“Title”变量。
我想尽可能多地使用tidyverse。我的大多数尝试都遵循下面的代码。我认为使用两个循环或purrr将允许我循环遍历Main和LookUp表。这比我通常尝试的更先进,所以我想在如何继续和处理这种情况方面提供一些帮助。
New<-map(Main, function(x) {
map(LookUp, function(y) if_else(x$Title1==y$Title_Lookup & ...x$Section1 CONTAINS Y SECTION_LOOKUP... ) )}),
示例代码如下:
主数据框:
Title1<-c("101A", "101A", "101A", "101A", "101A", "101A", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "400B", "400B", "400B", "400B", "200A", "200A", "250D", "250D", "250D", "250D")
Section1<-c("2A", "2A", "2B", "2B", "2B", "2C", "2A", "2A", "4A", "4A", "4A", "4B", "4B", "4C", "4C", "4C", "4C", "4D", "4D", "2A", "2A", "2B", "2B", "2A", "6A", "1A", "1B", "2A", "2A")
Main<-data_frame(Title1,Section1)
LookUp表:
Title_Lookup<-c("101A", "203S", "203S", "400B", "200A", "200A", "250D")
Section_Lookup<-c(2, 2, 4, 2, 2, 6, 2)
LookUp<-data_frame(Title_Lookup,Section_Lookup)
最终数据框:
Section_Final<-c("2A", "2B", "2C", "2A", "4A", "4B", "4C", "4D", "2A", "2B", "2A", "6A", "2A")
Title_Final<-c("101A", "101A", "101A", "203S", "203S", "203S", "203S", "203S", "400B", "400B", "200A", "200A", "250D")
Final<-data_frame(Title_Final,Section_Final)
答案 0 :(得分:1)
使用tidyverse的解决方案。 str_replace
函数来自stringr,它是tidyverse的一部分。如果您只想加载dplyr包,则可以使用sub("\\D+$", "", Section1)
代替str_replace
。
library(tidyverse)
Main2 <- Main %>%
mutate(Number = as.numeric(str_replace(Section1, "\\D+$", ""))) %>%
semi_join(LookUp, by = c("Title1" = "Title_Lookup",
"Number" = "Section_Lookup")) %>%
select(Title_Final = Title1, Section_Final = Section1) %>%
distinct()
Main2
# # A tibble: 13 x 2
# Title_Final Section_Final
# <chr> <chr>
# 1 101A 2A
# 2 101A 2B
# 3 101A 2C
# 4 203S 2A
# 5 203S 4A
# 6 203S 4B
# 7 203S 4C
# 8 203S 4D
# 9 400B 2A
# 10 400B 2B
# 11 200A 2A
# 12 200A 6A
# 13 250D 2A
答案 1 :(得分:0)
这是一个基于sqldf
包的解决方案,利用charindex()
查看SectionLookup中的字符串是否出现在Section1中。
library(tidyverse)
Title1<-c("101A", "101A", "101A", "101A", "101A", "101A", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "400B", "400B", "400B", "400B", "200A", "200A", "250D", "250D", "250D", "250D")
Section1<-c("2A", "2A", "2B", "2B", "2B", "2C", "2A", "2A", "4A", "4A", "4A", "4B", "4B", "4C", "4C", "4C", "4C", "4D", "4D", "2A", "2A", "2B", "2B", "2A", "6A", "1A", "1B", "2A", "2A")
Main<-data_frame(Title1,Section1)
Title_Lookup<-c("101A", "203S", "203S", "400B", "200A", "200A", "250D")
Section_Lookup<-as.character(c(2, 2, 4, 2, 2, 6, 2))
LookUp<-data_frame(Title_Lookup,Section_Lookup)
sqlQuery <- "select distinct a.Title1 as Title, a.Section1 as Section
from Main as a
left join LookUp as b
where
a.Title1 = b.Title_Lookup and
charindex(b.Section_Lookup,a.Section1) > 0"
sqldf(sqlQuery)
...和输出。
> sqldf(sqlQuery)
Title Section
1 101A 2A
2 101A 2B
3 101A 2C
4 203S 2A
5 203S 4A
6 203S 4B
7 203S 4C
8 203S 4D
9 400B 2A
10 400B 2B
11 200A 2A
12 200A 6A
13 250D 2A
>
答案 2 :(得分:0)
另一种方法可能只基于Section
列。
library(dplyr)
Name1<-c("Name1", "Name2", "Name3", "Name4", "Name5", "Name6", "Name7", "Name8", "Name9",
"Name10", "Name11", "Name12", "Name13", "Name14", "Name15", "Name16", "Name17",
"Name18", "Name19", "Name20", "Name21", "Name22", "Name23", "Name24", "Name25",
"Name26", "Name27", "Name28", "Name29")
Code<-c(10123, 13432, 34554, 45563, 43666, 54444, 55322, 52111, 33443, 88998, 54554,
33455, 65889, 88888, 22344, 54455, 66655, 22222, 65564, 77677, 65545, 67765,
34334, 88789, 76776, 67765, 55555, 65445, 65665)
Title1<-c("101A", "101A", "101A", "101A", "101A", "101A", "203S", "203S", "203S", "203S",
"203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "400B",
"400B", "400B", "400B", "200A", "200A", "250D", "250D", "250D", "250D")
Section1<-c("2A", "2A", "2B", "2B", "2B", "2C", "2A", "2A", "4A", "4A", "4A", "4B", "4B",
"4C", "4C", "4C", "4C", "4D", "4D", "2A", "2A", "2B", "2B", "2A", "6A", "1A",
"1B", "2A", "2A")
Main<-data_frame(Name1,Code,Title1,Section1)
Title_Lookup<-c("101A", "203S", "203S", "400B", "200A", "200A", "250D")
Section_Lookup<-c(2, 2, 4, 2, 2, 6, 2)
LookUp<-data_frame(Title_Lookup,Section_Lookup)
#create data.frame of distinct Sections
df_sections <- distinct(LookUp, Section_Lookup) %>% as.data.frame()
#Use filter to select those records having matching numeric value in Section
filter(Main, as.numeric(gsub("([0-9]).*","\\1",Section1)) %in% df$Section_Lookup) %>%
select(Title1, Section1) %>% distinct()
#The result:
# A tibble: 13 x 2
Title1 Section1
<chr> <chr>
1 101A 2A
2 101A 2B
3 101A 2C
4 203S 2A
5 203S 4A
6 203S 4B
7 203S 4C
8 203S 4D
9 400B 2A
10 400B 2B
11 200A 2A
12 200A 6A
13 250D 2A