我想在函数中为tidyjson包编写代码,如下所示:
> enter_object(object) %>% spread_values(
> varnames[1] = jstring(strings[1]),
> varnames[2] = jstring(strings[2]),
> varnames[3] = jstring(strings[3]) )
如果我有一个像foo
而不是varnames[1]
的文字字符串,那么此代码运行正常。但我希望函数更灵活,这样我就可以生成大量的varnames,而不是手工编写。变量名最终作为数据框列的名称。我目前的失败是:
+ enter_object(object) %>%
+ spread_values(
+ varnames[[1]] = jstring(strings[1]),
Error: unexpected '=' in:
" spread_values(
varnames[[1]] ="
> varnames[2] = jstring(strings[2]),
Error: unexpected ',' in " varnames[2] = jstring(strings[2]),"
> varnames[3] = jstring(strings[3])
Error in prep_path(...) : object 'strings' not found
> )
Error: unexpected ')' in " )"
我无法弄清=
前面放置什么类型的对象,以便识别=
。
以下是使用MrFlick解决方案的玩具示例:
> sample_json <- '[
+ {
+ "id": 10097652,
+ "members": 2386,
+ "category": {
+ "id": 23,
+ "name": "Outdoors & Adventure",
+ "shortname": "Outdoors"
+ }
+ }
+ ]'
>
> group_category1 <- sample_json %>% as.tbl_json %>%
+ gather_array %>% #gather_keys %>%
+ spread_values(
+ group_id = jstring("id")
+ ) %>%
+ enter_object("category") %>%
+ spread_values(
+ category_id = jstring("id"),
+ category_name = jstring("name"),
+ category_short_name = jstring("shortname")
+ )
> head(group_category1)
document.id array.index group_id category_id category_name category_short_name
1 1 1 10097652 23 Outdoors & Adventure Outdoors
>
> my_spread_values <- function(x, names, values) {
+ stopifnot(length(names) == length(values))
+ do.call("spread_values", c(list(x), setNames(as.list(values), names )))
+ }
> varnames <- c("category_id", "category_name", "category_shortname")
> strings <- c("id", "name", "shortname")
>
> group_category2 <- sample_json %>% as.tbl_json %>%
+ gather_array %>%
+ spread_values(group_id = jstring("id")) %>%
+ enter_object("category") %>% my_spread_values(
+ varnames, list(jstring(strings[1]), jstring(strings[2]), jstring(strings[3]) ) )
> head(group_category2)
document.id array.index group_id category_id category_name category_shortname
1 1 1 10097652 23 Outdoors & Adventure Outdoors
>
这完全有效! (一旦我犯过一些粗心的错误就修好了!)
答案 0 :(得分:2)
通常,R不允许您使用R变量动态指定参数名称。您必须使用do.call
动态构建实际调用,但这对于%>%
管道语法不会很好。我们可以创建一个更动态的替代版本的函数
my_spread_values <- function(x, names, values) {
stopifnot(length(names)==length(values))
do.call("spread_values", c(list(x), setNames(as.list(values), names)))
}
然后你可以把它称为
enter_object(object) %>% my_spread_values(
varnames,
list(jstring(strings[1]),jstring(strings[2]),jstring(strings[3]))
)
或者,如果您始终使用简单的jstring
enter_object(object) %>% my_spread_values(
varnames,
lapply(strings, jstring)
)
答案 1 :(得分:0)
以下是我现在使用的@ MrFlick示例的变体:
my_spread_values <- function(x, names_list, string_type) {
# A wrapper for "spread_values" that taks a list of names and a string type. Easier to read and much less code.
do.call("spread_values",
c(list(x), setNames(lapply(names_list, string_type), as.list(names_list) ) ) )
}
根据此处的文档定义网址:http://www.meetup.com/meetup_api/docs/
raw_contents <- GET(url = url)
json_raw <- httr::content(raw_contents, type = "text")
json_contents <- sub('^\\{"results":\\[', "[", json_raw)
json_contents <- sub('\\],"meta"\\:\\{.*\\}.$', "]",json_contents)
my_tbl_json <- gsub('\\\\/', "/",json_contents) %>% as.tbl_json
num_array <- c("created", "id", "lat", "lon", "members", "score")
group_numerics <- my_tbl_json %>%
gather_array %>%
my_spread_values(num_array, jnumber ) %>%
select(-document.id)
group_numerics$created <- as.POSIXct(group_numerics$created/1000, origin = "1970-01-01")
更加扩展的示例是this set of working code,它使用tidyjson包处理来自真实API数据源的数据。