有什么方法可以正确组合数据帧?

时间:2019-06-20 10:50:03

标签: r

在合并共享相同行的数据帧时遇到问题。我从不同的文件夹中导入了栅格,并将其转换为数据帧,并试图将它们组合在一起,但数据却未排成一行,我也不知道为什么! 请问有人有答案吗?

Fun <- function(f) {
  df <- stack(f)
}

files <- dir("mypath", recursive=TRUE, full.names=TRUE, pattern=".tif$")
All <- sapply(files, Fun)

for(i in All){
  df1 <- lapply(All, as.data.frame, xy = TRUE)
  all <- dplyr::bind_rows(df1)
}

现在。

  R1
        x        y    R1
1  696060 -3327450    66
2  696090 -3327450    71
3  696120 -3327450    69
4  696150 -3327450    65
5  696180 -3327450    67
6  696210 -3327450    68
7  696240 -3327450    67
8  696270 -3327450    68
9  696300 -3327450    69
10 696330 -3327450     0

R2
        x        y    R2
1  696060 -3327450    66
2  696090 -3327450    71
3  696120 -3327450    69
4  696150 -3327450    65
5  696180 -3327450    67
6  696210 -3327450    68
7  696240 -3327450    67
8  696270 -3327450    68
9  696300 -3327450    69
10 696330 -3327450     0

R1
        x        y   R1
1  753810 -3339930   109
2  753840 -3339930   108
3  753870 -3339930   108
4  753900 -3339930   109
5  753930 -3339930   108
6  753960 -3339930   109
7  753990 -3339930   109
8  754020 -3339930   109
9  754050 -3339930   110
10 754080 -3339930   109

R2
        x        y   R2
1  753810 -3339930   109
2  753840 -3339930   108
3  753870 -3339930   108
4  753900 -3339930   109
5  753930 -3339930   108
6  753960 -3339930   109
7  753990 -3339930   109
8  754020 -3339930   109
9  754050 -3339930   110
10 754080 -3339930   109


The output will be like that:
          x        y  R1    R2
1  696060 -3327450    66    NA
2  696090 -3327450    71    NA
3  696120 -3327450    69    NA
4  696150 -3327450    65    NA
5  696180 -3327450    67    NA
6  696210 -3327450    68    NA
7  696240 -3327450    67    NA
8  696270 -3327450    68    NA
9  696300 -3327450    69    NA
10 696330 -3327450     0    NA
11 696060 -3327450    NA    66
12 696090 -3327450    NA    71
13 696120 -3327450    NA    69
14 696150 -3327450    NA    65
15 696180 -3327450    NA    67
16 696210 -3327450    NA    68
17 696240 -3327450    NA    67
18 696270 -3327450    NA    68
19 696300 -3327450    NA    69
20 696330 -3327450    NA     0
21 753810 -3339930   109   109
22 753840 -3339930   108   108
23 753870 -3339930   108   108
24 753900 -3339930   109   109
25 753930 -3339930   108   108
26 753960 -3339930   109   109
27 753990 -3339930   109   109
28 754020 -3339930   109   109
29 754050 -3339930   110   110
30 754080 -3339930   109   109

关于正在发生的事情的任何想法或建议我该怎么办?

3 个答案:

答案 0 :(得分:0)

dplyr中,您将找到inner_joinleft_join函数(使用by = c()参数指定连接列)。

我建议使用它们作为链接数据的简便方法,尤其是在您熟悉SQL的情况下。

答案 1 :(得分:0)

您可以改编this solution。您可以通过在合并前添加一个id列来获得该顺序。

res <- Reduce(function(...) merge(..., all=TRUE), 
       Map(`[<-`, All, "id", value=substring(names(All), 2)))
res[order(res$id), -3]  # order here by `id`
#         x        y  R2  R1
# 1  696060 -3327450  NA  66
# 3  696090 -3327450  NA  71
# 5  696120 -3327450  NA  69
# 7  696150 -3327450  NA  65
# 9  696180 -3327450  NA  67
# 11 696210 -3327450  NA  68
# 13 696240 -3327450  NA  67
# 15 696270 -3327450  NA  68
# 17 696300 -3327450  NA  69
# 19 696330 -3327450  NA   0
# 2  696060 -3327450  66  NA
# 4  696090 -3327450  71  NA
# 6  696120 -3327450  69  NA
# 8  696150 -3327450  65  NA
# 10 696180 -3327450  67  NA
# 12 696210 -3327450  68  NA
# 14 696240 -3327450  67  NA
# 16 696270 -3327450  68  NA
# 18 696300 -3327450  69  NA
# 20 696330 -3327450   0  NA
# 21 753810 -3339930  NA 109
# 23 753840 -3339930  NA 108
# 25 753870 -3339930  NA 108
# 27 753900 -3339930  NA 109
# 29 753930 -3339930  NA 108
# 31 753960 -3339930  NA 109
# 33 753990 -3339930  NA 109
# 35 754020 -3339930  NA 109
# 37 754050 -3339930  NA 110
# 39 754080 -3339930  NA 109
# 22 753810 -3339930 109  NA
# 24 753840 -3339930 108  NA
# 26 753870 -3339930 108  NA
# 28 753900 -3339930 109  NA
# 30 753930 -3339930 108  NA
# 32 753960 -3339930 109  NA
# 34 753990 -3339930 109  NA
# 36 754020 -3339930 109  NA
# 38 754050 -3339930 110  NA
# 40 754080 -3339930 109  NA

数据

All <- list(structure(list(x = c(696060L, 696090L, 696120L, 696150L, 
696180L, 696210L, 696240L, 696270L, 696300L, 696330L), y = c(-3327450L, 
-3327450L, -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, 
-3327450L, -3327450L, -3327450L), R1 = c(66L, 71L, 69L, 65L, 
67L, 68L, 67L, 68L, 69L, 0L)), row.names = c(NA, -10L), class = "data.frame"), 
    structure(list(x = c(696060L, 696090L, 696120L, 696150L, 
    696180L, 696210L, 696240L, 696270L, 696300L, 696330L), y = c(-3327450L, 
    -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, 
    -3327450L, -3327450L, -3327450L), R2 = c(66L, 71L, 69L, 65L, 
    67L, 68L, 67L, 68L, 69L, 0L)), row.names = c(NA, -10L), class = "data.frame"), 
    structure(list(x = c(753810L, 753840L, 753870L, 753900L, 
    753930L, 753960L, 753990L, 754020L, 754050L, 754080L), y = c(-3339930L, 
    -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, 
    -3339930L, -3339930L, -3339930L), R1 = c(109L, 108L, 108L, 
    109L, 108L, 109L, 109L, 109L, 110L, 109L)), row.names = c(NA, 
    -10L), class = "data.frame"), structure(list(x = c(753810L, 
    753840L, 753870L, 753900L, 753930L, 753960L, 753990L, 754020L, 
    754050L, 754080L), y = c(-3339930L, -3339930L, -3339930L, 
    -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, 
    -3339930L), R2 = c(109L, 108L, 108L, 109L, 108L, 109L, 109L, 
    109L, 110L, 109L)), row.names = c(NA, -10L), class = "data.frame"))

答案 2 :(得分:0)

我不确定您是否仅需要这4个数据框,但我已经为您提供了主要思想。您/我们可以针对以上4种情况进行调整,或者针对数据框在列表中等进行调整...

数据:

dput(df1)
structure(list(x = c(696060L, 696090L, 696120L, 696150L, 696180L, 
696210L, 696240L, 696270L, 696300L, 696330L), y = c(-3327450L, 
-3327450L, -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, 
-3327450L, -3327450L, -3327450L), R1 = c(66L, 71L, 69L, 65L, 
67L, 68L, 67L, 68L, 69L, 0L)), class = "data.frame", row.names = c(NA, 
-10L))
dput(df2)
structure(list(x = c(696060L, 696090L, 696120L, 696150L, 696180L, 
696210L, 696240L, 696270L, 696300L, 696330L), y = c(-3327450L, 
-3327450L, -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, 
-3327450L, -3327450L, -3327450L), R2 = c(66L, 71L, 69L, 65L, 
67L, 68L, 67L, 68L, 69L, 0L)), class = "data.frame", row.names = c(NA, 
-10L))
dput(df3)
structure(list(x = c(753810L, 753840L, 753870L, 753900L, 753930L, 
753960L, 753990L, 754020L, 754050L, 754080L), y = c(-3339930L, 
-3339930L, -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, 
-3339930L, -3339930L, -3339930L), R1 = c(109L, 108L, 108L, 109L, 
108L, 109L, 109L, 109L, 110L, 109L)), class = "data.frame", row.names = c(NA, 
-10L))
dput(df4)
structure(list(x = c(753810L, 753840L, 753870L, 753900L, 753930L, 
753960L, 753990L, 754020L, 754050L, 754080L), y = c(-3339930L, 
-3339930L, -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, 
-3339930L, -3339930L, -3339930L), R2 = c(109L, 108L, 108L, 109L, 
108L, 109L, 109L, 109L, 110L, 109L)), class = "data.frame", row.names = c(NA, 
-10L))

首先使用dplyr s left_join每2个数据帧进行合并:

combined1 <- left_join(df1, df2, by = c("x" = "x", "y" = "y"))
combined1
        x        y R1 R2
1  696060 -3327450 66 66
2  696090 -3327450 71 71
3  696120 -3327450 69 69
4  696150 -3327450 65 65
5  696180 -3327450 67 67
6  696210 -3327450 68 68
7  696240 -3327450 67 67
8  696270 -3327450 68 68
9  696300 -3327450 69 69
10 696330 -3327450  0  0
combined2 <- left_join(df3, df4, by = c("x" = "x", "y" = "y"))
combined2
        x        y  R1  R2
1  753810 -3339930 109 109
2  753840 -3339930 108 108
3  753870 -3339930 108 108
4  753900 -3339930 109 109
5  753930 -3339930 108 108
6  753960 -3339930 109 109
7  753990 -3339930 109 109
8  754020 -3339930 109 109
9  754050 -3339930 110 110
10 754080 -3339930 109 109

然后,您可以rbind将它们放入所需的完整数据框中:

allCombined <- rbind(combined1, combined2)
allCombined
        x        y  R1  R2
1  696060 -3327450  66  66
2  696090 -3327450  71  71
3  696120 -3327450  69  69
4  696150 -3327450  65  65
5  696180 -3327450  67  67
6  696210 -3327450  68  68
7  696240 -3327450  67  67
8  696270 -3327450  68  68
9  696300 -3327450  69  69
10 696330 -3327450   0   0
11 753810 -3339930 109 109
12 753840 -3339930 108 108
13 753870 -3339930 108 108
14 753900 -3339930 109 109
15 753930 -3339930 108 108
16 753960 -3339930 109 109
17 753990 -3339930 109 109
18 754020 -3339930 109 109
19 754050 -3339930 110 110
20 754080 -3339930 109 109

这有意义吗?