Question

我有一个有趣的问题。我有一些未知数量的巨大表，但至少2.它们具有相同的模式，我想从每个表中提取子集并创建一个专门的交叉连接，以便我可以组合各个部分。

首先从2个data.table对象的简单示例开始：

DT1 <- data.table(A = rep(1:2, each = 2), B = rep(1:2, 2), C = sample(4))
DT2 <- data.table(A = rep(1:2, 2), B = sample(4), C = rep(1:2, each = 2))

> DT1
A B C
1 1 2
1 2 3
2 1 1
2 2 4

> DT2
A B C
1 2 1
2 1 1
1 3 2
2 4 2

因为我最终不知道要分配多少个表，所以我创建一个列表并将其传递给函数以使用sapply进行子集化：

tables = list(DT1, DT2)
foo <- function(dt, value) {
  result <- dt[A == value,]
  return(result)
}

combined <- sapply(tables, foo, value = 1)

然而，结合的结果是没有模式的向量的mismosh。我想要回来的是data.tables列表。 foo函数返回data.table但结果在sapply中被扰乱。如何修改代码以执行与此类似的操作？

combined <- list(DT1[A == 1,], DT2[A == 1])

导致data.tables列表

> combined
[[1]]
   A B C
1: 1 1 2
2: 1 2 3

[[2]]
   A B C
1: 1 2 1
2: 1 3 2

TIA一如既往。

Answer 1

$users = $rs['id'];

Answer 2

使用dplyr和lapply的另一种方法如下。扩展这项工作可以使其更具活力。

library(dplyr)
library(data.table)
set.seed(42)

DT1 <- data.table(A = rep(1:2, each = 2), B = rep(1:2, 2), C = sample(4))
DT2 <- data.table(A = rep(1:2, 2), B = sample(4), C = rep(1:2, each = 2))
DT3 <- data.table(A = rep(1:2, each = 2), B = sample(4), C = rep(1:2, times = 2))
DT4 <- data.table(A = rep(1:2, 2), B = sample(4), C = rep(1:2, each = 2))

# View the data.tables
list(DT1, DT2, DT3, DT4)
# [[1]]
#    A B C
# 1: 1 1 4
# 2: 1 2 3
# 3: 2 1 1
# 4: 2 2 2
# 
# [[2]]
#    A B C
# 1: 1 3 1
# 2: 2 2 1
# 3: 1 4 2
# 4: 2 1 2
# 
# [[3]]
#    A B C
# 1: 1 3 1
# 2: 1 4 2
# 3: 2 1 1
# 4: 2 2 2
# 
# [[4]]
#    A B C
# 1: 1 4 1
# 2: 2 1 1
# 3: 1 3 2
# 4: 2 2 2
# 

# Get a List of data.tables For A == 1
lapply(list(DT1, DT2, DT3, DT4),
       function(.data, ...) { as.data.table(dplyr::filter_(.data, ...)) },
       ~ A == 1)
# [[1]]
#    A B C
# 1: 1 1 4
# 2: 1 2 3
# 
# [[2]]
#    A B C
# 1: 1 3 1
# 2: 1 4 2
# 
# [[3]]
#    A B C
# 1: 1 3 1
# 2: 1 4 2
# 
# [[4]]
#    A B C
# 1: 1 4 1
# 2: 1 3 2
# 

# Get a List of data.tables For A == 2
lapply(list(DT1, DT2, DT3, DT4),
       function(.data, ...) { as.data.table(dplyr::filter_(.data, ...)) },
       ~ A == 2)
# [[1]]
#    A B C
# 1: 2 1 1
# 2: 2 2 2
# 
# [[2]]
#    A B C
# 1: 2 2 1
# 2: 2 1 2
# 
# [[3]]
#    A B C
# 1: 2 1 1
# 2: 2 2 2
# 
# [[4]]
#    A B C
# 1: 2 1 1
# 2: 2 2 2
#

扩展此方法以允许更复杂的过滤条件很容易。您需要做的就是在lapply电话中添加条件。

# Get a list of data.tables for A == 2 and B == 1
lapply(list(DT1, DT2, DT3, DT4),
       function(.data, ...) { as.data.table(dplyr::filter_(.data, ...)) },
       ~ A == 2, ~ B == 1)
# [[1]]
#    A B C
# 1: 2 1 1
# 
# [[2]]
#    A B C
# 1: 2 1 2
# 
# [[3]]
#    A B C
# 1: 2 1 1
# 
# [[4]]
#    A B C
# 1: 2 1 1
#

从sapply返回data.tables列表

2 个答案: