我有数据1
BAKUNG BALATAK BARUGA BERINGIN CEMERLANG
1 5.397762 4.4421650 1.1371410 0.1915185 1.2023105
2 4.551889 1.1710558 0.6596748 2.2443573 5.5094816
3 9.290642 1.9318098 0.3717321 0.7481191 2.3554114
4 1.758246 1.6478570 1.1930006 0.7687339 1.5786976
5 2.497720 0.5127110 0.3331624 0.3225225 0.7541041
6 3.080921 0.6563498 1.8712953 1.0959114 1.1883456
和数据2
BAKUNG BALATAK CEMERLANG
1 <NA> <NA> <NA>
2 <NA> 2 <NA>
3 14 6 <NA>
4 17 <NA> 1
5 <NA> <NA> <NA>
6 <NA> <NA> <NA>
AMPANA TETE AMPIBABO
1 <NA> <NA>
2 <NA> <NA>
3 <NA> <NA>
4 <NA> <NA>
5 <NA> <NA>
6 <NA> <NA>
我想从具有相同站名的data1和data2中找到数据站。我想在这里获取数据
BAKUNG_data1 BAKUNG_data2 BALATAK_data1 BALATAK_data2
1 5.397762 NA 4.4421650 NA
2 4.551889 NA 1.1710558 2
3 9.290642 14 1.9318098 6
4 1.758246 17 1.6478570 NA
5 2.497720 NA 0.5127110 NA
6 3.080921 NA 0.6563498 NA
我尝试过
abc <- merge(data1,data2, by = intersect(names(data1), names(data2)))
但是我一无所获,有什么办法解决吗?
答案 0 :(得分:3)
一种方法是获取长格式的数据,然后进行联接,最后获取宽格式的数据。
library(dplyr)
library(tidyr)
inner_join(data1 %>%
mutate(row = row_number()) %>%
pivot_longer(cols = -row, values_to = "data1"),
data2 %>%
mutate(row = row_number()) %>%
pivot_longer(cols = -row, values_to = "data2"),
by = c('name', 'row')) %>%
pivot_wider(names_from = name, values_from = starts_with('data')) %>%
select(-row)
答案 1 :(得分:2)
如果您愿意更改列名
tmp=intersect(names(data1), names(data2))
colnames(data1)=paste0(colnames(data1),"_data1")
colnames(data2)=paste0(colnames(data2),"_data2")
cbind(data1[grepl(paste0("^",tmp,".*",collapse="|"),colnames(data1))],
data2[grepl(paste0("^",tmp,".*",collapse="|"),colnames(data2))])
BAKUNG_data1 BALATAK_data1 CEMERLANG_data1 BAKUNG_data2 BALATAK_data2 CEMERLANG_data2
1 5.397762 4.4421650 1.2023105 <NA> <NA> <NA>
2 4.551889 1.1710558 5.5094816 <NA> 2 <NA>
3 9.290642 1.9318098 2.3554114 14 6 <NA>
4 1.758246 1.6478570 1.5786976 17 <NA> 1
5 2.497720 0.5127110 0.7541041 <NA> <NA> <NA>
6 3.080921 0.6563498 1.1883456 <NA> <NA> <NA>
答案 2 :(得分:1)
我添加了一个“ INDEX”列以执行连接。
library(dplyr)
library(tidyverse)
data1 <- read.table(textConnection("
BAKUNG BALATAK BARUGA BERINGIN CEMERLANG
5.397762 4.4421650 1.1371410 0.1915185 1.2023105
4.551889 1.1710558 0.6596748 2.2443573 5.5094816
9.290642 1.9318098 0.3717321 0.7481191 2.3554114
1.758246 1.6478570 1.1930006 0.7687339 1.5786976
2.497720 0.5127110 0.3331624 0.3225225 0.7541041
3.080921 0.6563498 1.8712953 1.0959114 1.1883456"), header = TRUE, na.strings = "<NA>")
data1 <- as.data.frame(data1)
data1 <- data1 %>% mutate(INDEX = row_number())
data2 <- read.table(textConnection("
BAKUNG BALATAK CEMERLANG AMPANA_TETE AMPIBABO
<NA> <NA> <NA> <NA> <NA>
<NA> 2 <NA> <NA> <NA>
14 6 <NA> <NA> <NA>
17 <NA> 1 <NA> <NA>
<NA> <NA> <NA> <NA> <NA>
<NA> <NA> <NA> <NA> <NA>"), header = TRUE, na.strings = "<NA>")
data2 <- as.data.frame(data2)
data2 <- data2 %>% mutate(INDEX = row_number())
data3 <- inner_join(data1, data2, by = c("INDEX", "INDEX"), suffix = c("_data1", "_data2"))
data3 <- data3 %>% select(contains("data"))
答案 3 :(得分:1)
Base R解决方案:
# Store a vector of the names of vectors common to both dataframes:
common_cols <- intersect(names(df1), names(df2))
# Column bind the dataframes:
df3 <- cbind(setNames(df1[,common_cols], paste0(common_cols, "_data1")),
setNames(df2[,common_cols], paste0(common_cols, "_data2")))
# Order the dataframe as required
df3_ordered <- df3[,sort(names(df3))]
数据:
df1 <-
structure(
list(
BAKUNG = c(5.397762, 4.551889, 9.290642,
1.758246, 2.49772, 3.080921),
BALATAK = c(4.442165, 1.1710558,
1.9318098, 1.647857, 0.512711, 0.6563498),
BARUGA = c(1.137141,
0.6596748, 0.3717321, 1.1930006, 0.3331624, 1.8712953),
BERINGIN = c(
0.1915185,
2.2443573,
0.7481191,
0.7687339,
0.3225225,
1.0959114
),
CEMERLANG = c(
1.2023105,
5.5094816,
2.3554114,
1.5786976,
0.7541041,
1.1883456
)
),
class = "data.frame",
row.names = c(NA,-6L)
)
df2 <-
structure(
list(
BAKUNG = c("<NA>", "<NA>", "14", "17",
"<NA>", "<NA>"),
BALATAK = c("<NA>", "2", "6", "<NA>", "<NA>",
"<NA>"),
CEMERLANG = c("<NA>", "<NA>", "<NA>", "1", "<NA>", "<NA>")
),
class = "data.frame",
row.names = c(NA,-6L)
)