如何创建计划目标以跨越先前地图目标和新变量的结果?

时间:2019-07-15 11:07:08

标签: r ropensci drake-r-package

在使用地图创建的多个目标(a)中,我还有两个其他目标(bd)遍历第一个目标。现在,我想在另一个目标中使用这些目标的结果。另外,我想与另一个变量(model)交叉。

我在下面粘贴了一个reprex,但是对于我来说,a描述了数据集的不同子集,bd预计算了一些内容,e使用预先计算的数据对每个子集应用不同的模型。

我尝试了map cross的不同组合(例如下面的e),但没有成功。我试图在fn4中添加所有要使用的目标名称,但这会造成不必要的交叉。

library(drake)
drake_plan(
  a = target(
    fn1(arg1, arg2),
    transform = map(
      arg1 = !!c("arg11", "arg12"),
      arg2 = !!c("arg21", "arg22")
    )
  ),
  b = target(
    fn2(arg1),
    transform = map(arg1)
  ),
  d = target(
    fn3(arg1),
    transform = map(arg1)
  ),
  e = target(
    fn4(b, d, model, arg1),
    transform = cross(
      b,
      d,
      model = !!c("x", "y", "z"),
      .by = arg1,
      .id = c(arg1, model)
    )
  ),
  trace = TRUE
)
#> # A tibble: 18 x 10
#>    target   command     arg1    arg2   a      b     d     model .by   e    
#>    <chr>    <expr>      <chr>   <chr>  <chr>  <chr> <chr> <chr> <chr> <chr>
#>  1 a_arg11… fn1("arg11… "\"arg… "\"ar… a_arg… <NA>  <NA>  <NA>  <NA>  <NA> 
#>  2 a_arg12… fn1("arg12… "\"arg… "\"ar… a_arg… <NA>  <NA>  <NA>  <NA>  <NA> 
#>  3 b_arg11  fn2("arg11… "\"arg… "\"ar… a_arg… b_ar… <NA>  <NA>  <NA>  <NA> 
#>  4 b_arg12  fn2("arg12… "\"arg… "\"ar… a_arg… b_ar… <NA>  <NA>  <NA>  <NA> 
#>  5 d_arg11  fn3("arg11… "\"arg… "\"ar… a_arg… <NA>  d_ar… <NA>  <NA>  <NA> 
#>  6 d_arg12  fn3("arg12… "\"arg… "\"ar… a_arg… <NA>  d_ar… <NA>  <NA>  <NA> 
#>  7 e_NA_x   fn4(b_arg1… <NA>    <NA>   <NA>   b_ar… d_ar… "\"x… arg1  e_NA…
#>  8 e_NA_y   fn4(b_arg1… <NA>    <NA>   <NA>   b_ar… d_ar… "\"y… arg1  e_NA…
#>  9 e_NA_z   fn4(b_arg1… <NA>    <NA>   <NA>   b_ar… d_ar… "\"z… arg1  e_NA…
#> 10 e_NA_x_2 fn4(b_arg1… <NA>    <NA>   <NA>   b_ar… d_ar… "\"x… arg1  e_NA…
#> 11 e_NA_y_2 fn4(b_arg1… <NA>    <NA>   <NA>   b_ar… d_ar… "\"y… arg1  e_NA…
#> 12 e_NA_z_2 fn4(b_arg1… <NA>    <NA>   <NA>   b_ar… d_ar… "\"z… arg1  e_NA…
#> 13 e_NA_x_3 fn4(b_arg1… <NA>    <NA>   <NA>   b_ar… d_ar… "\"x… arg1  e_NA…
#> 14 e_NA_y_3 fn4(b_arg1… <NA>    <NA>   <NA>   b_ar… d_ar… "\"y… arg1  e_NA…
#> 15 e_NA_z_3 fn4(b_arg1… <NA>    <NA>   <NA>   b_ar… d_ar… "\"z… arg1  e_NA…
#> 16 e_NA_x_4 fn4(b_arg1… <NA>    <NA>   <NA>   b_ar… d_ar… "\"x… arg1  e_NA…
#> 17 e_NA_y_4 fn4(b_arg1… <NA>    <NA>   <NA>   b_ar… d_ar… "\"y… arg1  e_NA…
#> 18 e_NA_z_4 fn4(b_arg1… <NA>    <NA>   <NA>   b_ar… d_ar… "\"z… arg1  e_NA…

reprex package(v0.3.0)于2019-07-15创建

这似乎可行,但是arg1arg2并未结转,也无法在fn4和后续目标中使用。我应该将这一步分为两个步骤吗? (map然后crosscross然后map?)我尝试过早于a之后越过,但我不会重新计算相同的{{ 1}}和b多次,这可能会花费大量时间和内存。

编辑:一个更现实的例子

因为许多目标使用d函数(调用外部二进制文件)需要保存为文件的相同数据,所以可以防止多次重复计算同一事物并保存多次。同一件事放在不同的文件中(可能很大),我在Drake中分离了所有这些任务。

run

reprex package(v0.3.0)于2019-07-15创建

计划:i.imgur.com/MyqoKJi.png

编辑2:

我现在在地图转换中使用 library(drake) library(tibble) library(dplyr) #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:stats': #> #> filter, lag #> The following objects are masked from 'package:base': #> #> intersect, setdiff, setequal, union path_data <- c("path/data_1.csv", "path/data_2.csv") countries <- c("1", "2") analysis_dir <- "path" substudies_1 <- tribble( ~substudy, ~adjust, ~sex, "sub1", "no", "male/female", "sub2", "yes", "male/female" ) models <- c("x", "y") plan <- drake_plan( data = target( get_data(file_in(path)), transform = map(path = !!path_data, country = !!countries, .id = country) ), SNP = target( get_SNP_data_country(SNP_gene, data), transform = map(data, .id = country) ), map = target( # actually write file and save path write_snp_map(SNP, file.path(analysis_dir, country, "SNP_map.txt")), transform = map(SNP, .id = country) ), ref = target( # actually write file and save path write_snp_ref(SNP, file.path(analysis_dir, country, "SNP_ref.txt")), transform = map(SNP, .id = country) ), # data_2 is managed in another target because it has a different set of substudies, # this maybe can be tidied up, a problem for another day... population_1 = target( extract_population(data, sex, adjust), transform = map( data = data_1, country = "1", .data = !!substudies_1, .id = c(substudy) ), ), pedigree_1 = target( extract_pedigree(data_1, population_1), transform = map( population_1, .id = substudy ) ), covariable_1 = target( extract_covariable(data_1, population_1, adjust, sex), transform = map( population_1, .id = substudy ) ), # run_1 = target( # run_fn(map_1, ref_1, pedigree_1, covariable_1, substudy, model, adjust, sex), # transform = cross(population_1, model = !!models) # ), trace = TRUE ) # the desired plan for the run target run_plan <- tibble( target = c("run_1_x_population_1_sub1", "run_1_y_population_1_sub1", "run_1_x_population_1_sub2", "run_1_y_population_1_sub2"), command = list( expr(run(map_1, ref_1, pedigree_1_sub1, covariable_1_sub1, "x", "sub1", "no")), expr(run(map_1, ref_1, pedigree_1_sub1, covariable_1_sub1, "y", "sub1", "no")), expr(run(map_1, ref_1, pedigree_1_sub2, covariable_1_sub2, "x", "sub2", "yes")), expr(run(map_1, ref_1, pedigree_1_sub2, covariable_1_sub2, "y", "sub2", "yes")) ), path = NA_character_, country = "1", population_1 = c(rep("population_1_sub1", 2), rep("population_1_sub2", 2)), substudy = c(rep("sub1", 2), rep("sub2", 2)), adjust = c(rep("no", 2), rep("yes", 2)), sex = c(rep("male/female", 4)), pedigree_1 = c(rep("pedigree_1_sub1", 2), rep("pedigree_1_sub2", 2)), covariable_1 = c(rep("covariable_1_sub1", 2), rep("covariable_1_sub2", 2)), model = c("x", "y", "x", "y"), SNP = "SNP_1", map = "map_1", ref = "ref_1" ) config <- drake_config(bind_rows(plan, run_plan)) vis_drake_graph(config, targets_only = TRUE) 参数,该参数使用具有先前目标名称的数据框(使用.data),除了它不适用于rlang::syms drake::drake_plan参数。此解决方案也不是最佳方案,因为为max_expand制作网格非常冗长。

1 个答案:

答案 0 :(得分:0)

您介意不进行任何转换就明确发布所需的计划吗? drake_plan_source()可以提供帮助。

一个便笺:只有combine()可以理解.by。也许另一种方法是使用transform = map(.data = !!your_grid_of_combinations)https://ropenscilabs.github.io/drake-manual/plans.html#map

您想要的计划看起来像这样吗?

library(drake)
plan <- drake_plan(
  a = target(
    fn1(arg1, arg2),
    transform = map(
      arg1 = !!c("arg11", "arg12"),
      arg2 = !!c("arg21", "arg22")
    )
  ),
  b = target(
    fn2(arg1),
    transform = map(arg1)
  ),
  d = target(
    fn3(arg1),
    transform = map(arg1)
  ),
  e = target(
    fn4(b, d, model, arg1),
    transform = cross(
      b,
      d,
      model = c("x", "y", "z"),
      arg1,
      .id = c(arg1, model)
    )
  )
)

config <- drake_config(plan)
vis_drake_graph(config)

reprex package(v0.3.0)于2019-07-15创建