我有一个data.frames列表,这些列表已按类别随机分层以获取原始数据集的70%。我想反加入data.frames列表,以获得由剩余的30%数据组成的data.frames单独列表。
require(splitstackshape)
n <- 10
heads <- "bc_title4"
train_split <- function(x) {
listofdfs <- list()
for(i in 1:n){
df <- stratified(x, 1, 0.7)
listofdfs[[i]] <- df
}
return(listofdfs)
}
train_list <- train_split(survey_points) #returns data.frame list to environment
test_split <- function(x) {
listofdfs2 <- list()
for(i in train_list){
df <- x[!x$id %in% data.frame(train_list[i])$id,]
listofdfs2[[i]] <- df
}
return(listofdfs2)
}
test_list <- test_split(survey_points)
但是由于出现以下错误,我不知道如何为反联接编写函数:
Error in train_list[i] : invalid subscript type 'list'
6.
data.frame(train_list[i])
5.
x$id %in% data.frame(train_list[i])$id
4.
x$id %in% data.frame(train_list[i])$id
3.
`[.data.frame`(x, !x$id %in% data.frame(train_list[i])$id, )
2.
x[!x$id %in% data.frame(train_list[i])$id, ]
1.
test_split(survey_points)
尝试使用函数anti_join时出现相同的错误:
test_split <- function(x) {
listofdfs2 <- list()
for(i in train_list){
anti_join(x, data.frame(train_list[i]), by = "id")
listofdfs2[[i]]
}
return(listofdfs2)
}
test <- test_split(survey_points)
Error in train_list[i] : invalid subscript type 'list'
9.
data.frame(train_list[i])
8.
tbl_vars(y)
7.
check_valid_names(tbl_vars(y), warn_only = TRUE)
6.
anti_join.tbl_df(tbl_df(x), y, by = by, copy = copy, ...)
5.
anti_join(tbl_df(x), y, by = by, copy = copy, ...)
4.
as.data.frame(anti_join(tbl_df(x), y, by = by, copy = copy, ...))
3.
anti_join.data.frame(x, data.frame(train_list[i]), by = "id")
2.
anti_join(x, data.frame(train_list[i]), by = "id")
1.
test_split(survey_points)