根据条件将包含列表的两列合并为一列

时间:2019-01-01 15:06:55

标签: r list merge

我在数据框中有两列x和y,它们都是列表形式。 col x中的某些列表具有逻辑(0)值,我想用y列中的列表值填充它们。我如何在R中做到这一点。 enter image description here

样品量

df <- structure(
  list(
    x = list(
      structure(logical(0), .Dim = c(0L,
                                     2L)),
      structure(
        c(72.8468555473385, 19.1207531432888),
        .Dim = 1:2,
        .Dimnames = list("1", c("X", "Y"))
      ),
      structure(logical(0), .Dim = c(0L, 2L)),
      structure(
        c(72.8466089689375, 19.1222313526198),
        .Dim = 1:2,
        .Dimnames = list("1", c("X", "Y"))
      ),
      structure(
        c(72.8458211528575, 19.1206957620104),
        .Dim = 1:2,
        .Dimnames = list("1", c("X", "Y"))
      )
    ),
    y = list(
      structure(
        c(72.846989997634, 19.1197250026469),
        .Dim = 1:2,
        .Dimnames = list(NULL, c("lon", "lat"))
      ),
      structure(
        c(72.846989997634,
          19.1197250026469),
        .Dim = 1:2,
        .Dimnames = list(NULL, c("lon",
                                 "lat"))
      ),
      structure(
        c(72.8480650003086, 19.1195200000195),
        .Dim = 1:2,
        .Dimnames = list(NULL, c("lon", "lat"))
      ),
      structure(
        c(72.8463200059764,
          19.1207150074423),
        .Dim = 1:2,
        .Dimnames = list(NULL, c("lon",
                                 "lat"))
      ),
      structure(
        c(72.8468350022863, 19.1204500035408),
        .Dim = 1:2,
        .Dimnames = list(NULL, c("lon", "lat"))
      )
    )
  ),
  .Names = c("x", "y"),
  row.names = c(NA,-5L),
  class = "data.frame"
)

我希望x col具有如下所示的x和y的组合值

 x
    1 72.84699, 19.11973
    2 72.84686, 19.12075
    3  72.84807, 19.11952
    4 72.84661, 19.12223 
    5 72.84582, 19.12070 

2 个答案:

答案 0 :(得分:3)

可能有更聪明的方法,但是使用基数R mapply,我们可以检查length列中的x,如果它小于1,则将其替换为value在y列中。

df$x <- mapply(function(x, y) if (length(x) > 1) list(x) else list(y), df$x, df$y)

df
#                   x                  y
#1 72.84699, 19.11973 72.84699, 19.11973
#2 72.84686, 19.12075 72.84699, 19.11973
#3 72.84807, 19.11952 72.84807, 19.11952
#4 72.84661, 19.12223 72.84632, 19.12072
#5 72.84582, 19.12070 72.84684, 19.12045

答案 1 :(得分:2)

我们可以在base R中以向量化的方式进行操作,方法是创建一个逻辑索引以分配“ x”列中的值

i1 <- !lengths(df$x)
df$x[i1] <- df$y[i1]

或单行

df$x <- replace(df$x, i1, df$y[i1])
df 
#                x                  y
#1 72.84699, 19.11973 72.84699, 19.11973
#2 72.84686, 19.12075 72.84699, 19.11973
#3 72.84807, 19.11952 72.84807, 19.11952
#4 72.84661, 19.12223 72.84632, 19.12072
#5 72.84582, 19.12070 72.84684, 19.12045

或与tidyverse

library(tidyverse)
df %>%
    mutate(x = ifelse(lengths(x)==0, y, x))

基准

一些稍大的数据集上的基准测试

df1 <- df[rep(seq_len(nrow(df)), 1e6), ]
df2 <- copy(df1)
system.time({
df1$x <- mapply(function(x, y) if (length(x) > 1) list(x) else list(y), df1$x, df1$y)

})
#user  system elapsed 
#  6.261   0.941   7.164 

system.time({
i1 <- !lengths(df2$x)
df2$x[i1] <- df2$y[i1]

})
# user  system elapsed 
#  0.858   0.018   0.874