Drake R-是否可以通过触发功能生成目标

时间:2019-05-03 04:29:08

标签: r drake-r-package

我目前正在为一个项目评估drake,并且我想知道是否存在以编程方式生成包含触发器的计划目标或计划的一部分的方法。

下面是我的理想用例/示例代码,但是由于drake将我的功能用作命令,并且不读取触发器,因此我在尝试使其工作方面遇到问题。

drake_fetch_remote_data <- function(remote_path, ...) {
  hash = retrive_remote_hash(remote_path)
  target(
    command = fetch_remote_data(remote_path),
    trigger = trigger(
      change = hash
    )
  )
}

plan <- drake_plan(
  df1 = drake_fetch_remote_data('path1'),
  df2 = drake_fetch_remote_data('path2'),
)
#> # A tibble: 2 x 2
#>   target   command                         
#>   <chr>    <expr>                          
#> 1 df1 drake_fetch_remote_data("path1")
#> 2 df2 drake_fetch_remote_data("path2")

以下代码包括触发器。

drake_fetch_remote_data <- function(remote_path, ...) {
  hash = retrive_remote_hash(remote_path)
  target(
    command = fetch_remote_data(remote_path),
    trigger = trigger(
      change = hash
    )
  )
}

plan <- drake_plan(
  df1 = target(
    command = fetch_remote_data('path1'),
    trigger = trigger(change = lookup_hash('path1'))
  )
)
# A tibble: 1 x 3
#   target command                    trigger                               
#   <chr>  <expr>                     <expr>                                
# 1 df1    fetch_remote_data("path1") trigger(change = lookup_hash("path1"))

1 个答案:

答案 0 :(得分:1)

绝对!有几种方法。请查看https://ropenscilabs.github.io/drake-manual/plans.html#large-plans

library(drake)

# Write out paths one by one.
drake_plan(
  df = target(
    drake_fetch_remote_data(path),
    transform = map(path = c("path1", "path2"))
  )
)
#> # A tibble: 2 x 2
#>   target   command                         
#>   <chr>    <expr>                          
#> 1 df_path1 drake_fetch_remote_data("path1")
#> 2 df_path2 drake_fetch_remote_data("path2")

# Or generate a large collection of paths.
drake_plan(
  df = target(
    drake_fetch_remote_data(path),
    transform = map(path = !!paste0("path", seq_len(10)))
  )
)
#> # A tibble: 10 x 2
#>    target    command                          
#>    <chr>     <expr>                           
#>  1 df_path1  drake_fetch_remote_data("path1") 
#>  2 df_path2  drake_fetch_remote_data("path2") 
#>  3 df_path3  drake_fetch_remote_data("path3") 
#>  4 df_path4  drake_fetch_remote_data("path4") 
#>  5 df_path5  drake_fetch_remote_data("path5") 
#>  6 df_path6  drake_fetch_remote_data("path6") 
#>  7 df_path7  drake_fetch_remote_data("path7") 
#>  8 df_path8  drake_fetch_remote_data("path8") 
#>  9 df_path9  drake_fetch_remote_data("path9") 
#> 10 df_path10 drake_fetch_remote_data("path10")

# You can reference a variable that stores the paths.
paths <- paste0("path", seq_len(10))
drake_plan(
  df = target(
    drake_fetch_remote_data(path),
    transform = map(path = !!paths)
  )
)
#> # A tibble: 10 x 2
#>    target    command                          
#>    <chr>     <expr>                           
#>  1 df_path1  drake_fetch_remote_data("path1") 
#>  2 df_path2  drake_fetch_remote_data("path2") 
#>  3 df_path3  drake_fetch_remote_data("path3") 
#>  4 df_path4  drake_fetch_remote_data("path4") 
#>  5 df_path5  drake_fetch_remote_data("path5") 
#>  6 df_path6  drake_fetch_remote_data("path6") 
#>  7 df_path7  drake_fetch_remote_data("path7") 
#>  8 df_path8  drake_fetch_remote_data("path8") 
#>  9 df_path9  drake_fetch_remote_data("path9") 
#> 10 df_path10 drake_fetch_remote_data("path10")

# Shorter target names.
ids = as.numeric(seq_len(10))
paths <- paste0("path", seq_len(10))
drake_plan(
  df = target(
    drake_fetch_remote_data(path),
    transform = map(path = !!paths, id = !!ids, .id = id)
  )
)
#> # A tibble: 10 x 2
#>    target command                          
#>    <chr>  <expr>                           
#>  1 df_1   drake_fetch_remote_data("path1") 
#>  2 df_2   drake_fetch_remote_data("path2") 
#>  3 df_3   drake_fetch_remote_data("path3") 
#>  4 df_4   drake_fetch_remote_data("path4") 
#>  5 df_5   drake_fetch_remote_data("path5") 
#>  6 df_6   drake_fetch_remote_data("path6") 
#>  7 df_7   drake_fetch_remote_data("path7") 
#>  8 df_8   drake_fetch_remote_data("path8") 
#>  9 df_9   drake_fetch_remote_data("path9") 
#> 10 df_10  drake_fetch_remote_data("path10")

reprex package(v0.2.1)于2019-05-03创建

编辑2019-05-05

触发器可以像命令一样使用分组变量。

library(drake)
ids <- as.numeric(seq_len(10))
paths <- paste0("path", seq_len(10))
drake_plan(
  df = target(
    fetch_data(path),
    trigger = trigger(change = hash_data(path)),
    transform = map(path = !!paths, id = !!ids, .id = id)
  )
)
#> # A tibble: 10 x 3
#>    target command              trigger                              
#>    <chr>  <expr>               <expr>                               
#>  1 df_1   fetch_data("path1")  trigger(change = hash_data("path1")) 
#>  2 df_2   fetch_data("path2")  trigger(change = hash_data("path2")) 
#>  3 df_3   fetch_data("path3")  trigger(change = hash_data("path3")) 
#>  4 df_4   fetch_data("path4")  trigger(change = hash_data("path4")) 
#>  5 df_5   fetch_data("path5")  trigger(change = hash_data("path5")) 
#>  6 df_6   fetch_data("path6")  trigger(change = hash_data("path6")) 
#>  7 df_7   fetch_data("path7")  trigger(change = hash_data("path7")) 
#>  8 df_8   fetch_data("path8")  trigger(change = hash_data("path8")) 
#>  9 df_9   fetch_data("path9")  trigger(change = hash_data("path9")) 
#> 10 df_10  fetch_data("path10") trigger(change = hash_data("path10"))

reprex package(v0.2.1)于2019-05-06创建

为更加清楚起见,您还可以将命令作为命名参数提供给target()

library(drake)
ids <- as.numeric(seq_len(10))
paths <- paste0("path", seq_len(10))
drake_plan(
  df = target(
    command = fetch_data(path), # now named for clarity
    trigger = trigger(change = hash_data(path)),
    transform = map(path = !!paths, id = !!ids, .id = id)
  )
)
#> # A tibble: 10 x 3
#>    target command              trigger                              
#>    <chr>  <expr>               <expr>                               
#>  1 df_1   fetch_data("path1")  trigger(change = hash_data("path1")) 
#>  2 df_2   fetch_data("path2")  trigger(change = hash_data("path2")) 
#>  3 df_3   fetch_data("path3")  trigger(change = hash_data("path3")) 
#>  4 df_4   fetch_data("path4")  trigger(change = hash_data("path4")) 
#>  5 df_5   fetch_data("path5")  trigger(change = hash_data("path5")) 
#>  6 df_6   fetch_data("path6")  trigger(change = hash_data("path6")) 
#>  7 df_7   fetch_data("path7")  trigger(change = hash_data("path7")) 
#>  8 df_8   fetch_data("path8")  trigger(change = hash_data("path8")) 
#>  9 df_9   fetch_data("path9")  trigger(change = hash_data("path9")) 
#> 10 df_10  fetch_data("path10") trigger(change = hash_data("path10"))

reprex package(v0.2.1)于2019-05-06创建