我正在尝试使用drake
R包来处理跨多个计划的多个文件输入,因此我可以迭代地建立目标,测试每个阶段的工作方式。以下是显示我要完成的任务的简单说明。官方文档显示了如何在单个计划中 做这种事情,但我的困难是我想跨多个计划做这个事情。
我无法弄清楚将正确的输入名称(来自read_in_plan
的目标)转换成munge_plan
的转换应该是什么
library(drake)
dplyr_version_dep = pkg_description = utils::packageDescription('dplyr')
munge_data = function(input){
message("I did something!")
}
file_inputs = c("file1.csv", "file2.csv")
# get my data in
read_in_plan = drake_plan(
# make the plan dependent on changes to dplyr
pkg = utils::packageDescription('dplyr'),
data = target(
read.csv(input),
transform = map(input = !!file_inputs)
)
)
read_in_plan
#> # A tibble: 3 x 2
#> target command
#> <chr> <expr>
#> 1 pkg utils::packageDescription("dplyr")
#> 2 data_file1.csv read.csv("file1.csv")
#> 3 data_file2.csv read.csv("file2.csv")
# now do something to each of those targets
munge_plan = drake_plan(
munged = munge_data(data_file1.csv)
)
munge_plan
#> # A tibble: 1 x 2
#> target command
#> <chr> <expr>
#> 1 munged munge_data(data_file1.csv)
# but really I want to do munge data on all of the
# data_file1.csv
# data_file2.csv
# munge_data_proper = drake_plan(
# munged = target(
# # some kind of transform here
# )
# )
full_plan = bind_plans(read_in_plan,
munge_plan)
# make(full_plan)
由reprex package(v0.2.1)于2019-05-23创建
答案 0 :(得分:1)
所有转换都旨在在一次调用drake_plan()
内发生,因此很难将下面的data_*
目标和munged_*
目标分成不同的计划。
library(drake)
dplyr_version_dep = pkg_description = utils::packageDescription('dplyr')
munge_data = function(input){
message("I did something!")
}
file_inputs <- c("file1.csv", "file2.csv")
plan <- drake_plan(
pkg = target(
dplyr_version_dep,
# Triggers are always checked even though commands do not always run:
trigger = trigger(change = utils::packageDescription("dplyr"))
),
data = target(
read.csv(input),
transform = map(input = !!file_inputs, .id = FALSE)
),
# Borrow from the previous transform:
munged = target(
munge_data(data),
transform = map(data)
)
)
drake_plan_source(plan)
#> drake_plan(
#> pkg = target(
#> command = dplyr_version_dep,
#> trigger = trigger(
#> change = utils::packageDescription("dplyr")
#> )
#> ),
#> data = read.csv("file1.csv"),
#> data_2 = read.csv("file2.csv"),
#> munged_data = munge_data(data),
#> munged_data_2 = munge_data(data_2)
#> )
由reprex package(v0.3.0)于2019-05-23创建
对您来说,一种破解方法是从drake_plan(trace = TRUE)
获取信息。易碎,但在这个小例子中很有用。
library(drake)
dplyr_version_dep = pkg_description = utils::packageDescription('dplyr')
munge_data = function(input){
message("I did something!")
}
file_inputs <- c("file1.csv", "file2.csv")
plan1 <- drake_plan(
pkg = target(
dplyr_version_dep,
# Triggers are always checked even though commands do not always run:
trigger = trigger(change = utils::packageDescription("dplyr"))
),
data = target(
read.csv(input),
transform = map(input = !!file_inputs, .id = FALSE)
),
trace = TRUE
)
plan1
#> # A tibble: 3 x 5
#> target command trigger input data
#> <chr> <expr> <expr> <chr> <chr>
#> 1 pkg dplyr_version_… trigger(change = utils::packageD… <NA> <NA>
#> 2 data read.csv("file… NA … "\"file1.… data
#> 3 data_2 read.csv("file… NA … "\"file2.… data…
plan1$input
#> [1] NA "\"file1.csv\"" "\"file2.csv\""
plan1$data
#> [1] NA "data" "data_2"
# Put together the data manually for the next transformation.
library(magrittr)
data <- plan1$data %>%
na.omit() %>%
unique() %>%
rlang::syms()
str(data)
#> List of 2
#> $ : symbol data
#> $ : symbol data_2
plan2 <- drake_plan(
munged = target(
munge_data(d),
transform = map(d = !!data) # !! is key
)
)
plan2
#> # A tibble: 2 x 2
#> target command
#> <chr> <expr>
#> 1 munged_data munge_data(data)
#> 2 munged_data_2 munge_data(data_2)
full_plan <- bind_plans(dplyr::select(plan1, target, command), plan2)
full_plan
#> # A tibble: 5 x 2
#> target command
#> <chr> <expr>
#> 1 pkg dplyr_version_dep
#> 2 data read.csv("file1.csv")
#> 3 data_2 read.csv("file2.csv")
#> 4 munged_data munge_data(data)
#> 5 munged_data_2 munge_data(data_2)
由reprex package(v0.3.0)于2019-05-23创建