您的数据存储在大量SQLite数据库文件中。 您希望从一个表中收集所有这些数据库文件中的数据。
这可以使用dplyr
或tidyverse
吗?
示例数据:
# Required Libraries
require('tidyverse')
require('RSQLite')
require('pool')
require('here')
# Create the dummy data
test <- data.frame(t(replicate(2,sample(0:10,4,rep=TRUE))))
fn <- here::here('1testing.sqlite3')
con <- dbPool(drv = RSQLite::SQLite(),
dbname = fn)
write_result = dbWriteTable(con, "TEST", test)
poolClose(con)
rm(con)
# Create multiple SQLite databases
fn = here::here('1testing.sqlite3')
file.copy(from=fn, to=here::here('2testing.sqlite3'))
file.copy(from=fn, to=here::here('3testing.sqlite3'))
注意:接受的答案建议创建用户定义的函数(UDF)。在此范围内,您可以合并并处理来自多个表的数据,并返回最终结果。
答案 0 :(得分:0)
这是可能的。 关键部分是:
map
中使用mutate
来调用您的自定义功能示例:
# Required libraries
require('tidyverse')
require('RSQLite')
require('pool')
require('here')
# User defined function to read+process the data
# Files that raise errors or warnings will have NA
gather_data <- function(fn) {
dat = tryCatch({
print(paste0('Processing ', fn))
db <- dbPool(drv = RSQLite::SQLite(), dbname = fn)
tblsmry <- db %>% tbl('TEST') %>% collect()
tblsmry
},
error=function(e){print(e); return(as.data.frame(NA))},
warning=function(e){print(e); return(as.data.frame(NA))},
finally={
if(exists('db') & db$valid ){ poolClose(db); rm(db) }
}
)
if(exists('dat')){return(as.data.frame(dat))}else{return(as.data.frame(NA))}
}
# Collate the data
results <- data.frame( FILE = list.files(path = here::here(), pattern = '*testing.sqlite3$', full.names = FALSE) ) %>%
mutate_if(is.factor, as.character) %>%
mutate(res = map(FILE, gather_db_data)) %>%
unnest()
<强>输出:强>
> results
FILE X1 X2 X3 X4
1 1testing.sqlite3 1 3 0 9
2 1testing.sqlite3 2 0 5 1
3 2testing.sqlite3 1 3 0 9
4 2testing.sqlite3 2 0 5 1
5 3testing.sqlite3 1 3 0 9
6 3testing.sqlite3 2 0 5 1