我正在使用我通过prettify()
传递的AOL数据集。数据的类型和长度为:
> json_types(People)
document.id type
1 , 1 array
> json_lengths(People)
document.id length
1 , 1, 4
数据经过prettify()
时的一瞥:
{
"distinct_id": "159d26d852bc2-0218a9eedf5d02-1d326f50-13c680-159d26d852c2cc",
"time": 1485294450309,
"properties": {
"$browser": "Chrome",
"$browser_version": 55,
"$city": "San Francisco",
"$country_code": "US",
"$email": "amir.movafaghi@mixpanel.com",
"$initial_referrer": "$direct",
"$initial_referring_domain": "$direct",
"$name": "Amir MOvafaghi",
"$os": "Mac OS X",
"$region": "California",
"$timezone": "America/Los_Angeles",
"$transactions": [
{
"$amount": 0.99,
"$time": "2017-01-24T13:43:30.000Z"
}
],
"Favorite Genre": "Rock",
"Lifetime Song Play Count": 1,
"Lifetime Song Purchase Count": 1,
"Plan": "Premium"
},
"last_seen": 1485294450309,
"labels": [
]
},
我建立了我的转型:
people_b <- People %>%
gather_array %>% # stack the user data
spread_values(
distinct_id = jstring("distinct_id"),
time_id = jnumber("time"),
last_seen = jstring("last_seen"),
label = jstring("label")) %>% # extract user data
enter_object("properties") %>% # stack the properties
spread_values(
browser = jstring("$browser"),
browser_version = jnumber("$browser_version"),
city = jstring("$city"),
country_code = jstring("$country_code"),
email = jstring("$email"),
initial_referrer = jstring("$initial_referrer"),
initial_referring_domain = jstring("$initial_referring_domain"),
name = jstring("$name"),
operating_system = jstring("$os"),
region = jstring("$region"),
timezone = jstring("$timezone"),
favorite_genre = jstring("Favorite Genre"),
first_login_date = jstring("First Login Date"),
lifetime_song_play_count = jnumber("Lifetime Song Play Count"),
lifetime_song_purchase_count = jnumber("Lifetime Song Purchase Count"),
plan = jstring("Plan")) %>% #extract the properties)
enter_object("transactions") %>% #stack the transactions
gather_array %>%
spread_values(
amount = jnumber("$amount"),
transaction_time = jstring("$time")) %>% # extract the transactions
select(distinct_id, time_id, last_seen, label, browser, browser_version, city, country_code, email, initial_referrer,
initial_referring_domain, name, operating_system, region, timezone, favorite_genre,
first_login_date,lifetime_song_play_count, lifetime_song_purchase_count, plan, amount, transaction_time)
但是我收到错误代码:
> people_b
[1] distinct_id time_id last_seen label
[5] browser browser_version city country_code
[9] email initial_referrer initial_referring_domain name
[13] operating_system region timezone favorite_genre
[17] first_login_date lifetime_song_play_count lifetime_song_purchase_count plan
[21] amount transaction_time
<0 rows> (or 0-length row.names)
来自第二个数据集的样本输出(我仍然需要整理):
> event_b
name distinct_id label time sampling_factor browser_type
1 Page Loaded 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12 1 Chrome
2 Page Loaded 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12 1 Chrome
3 Sign Up 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12 1 Chrome
4 Page Loaded 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12 1 Chrome
5 Song Played 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12 1 Chrome
6 Song Played 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12 1 Chrome
7 Song Purchased 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12 1 Chrome
8 Plan Downgraded 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12 1 Chrome
答案 0 :(得分:0)
我认为您的问题出在管道的enter_object('transactions')
组件中。在您的JSON对象中,您有密钥$transactions
,因此您使用了错误的路径。改为'$transactions'
似乎有效。
...
enter_object("$transactions") %>% #stack the transactions
...
完整的例子。请注意,我删除了gather_array
,因为您的示例只是一个对象。
json <- '{
"distinct_id": "159d26d852bc2-0218a9eedf5d02-1d326f50-13c680-159d26d852c2cc",
"time": 1485294450309,
"properties": {
"$browser": "Chrome",
"$browser_version": 55,
"$city": "San Francisco",
"$country_code": "US",
"$email": "amir.movafaghi@mixpanel.com",
"$initial_referrer": "$direct",
"$initial_referring_domain": "$direct",
"$name": "Amir MOvafaghi",
"$os": "Mac OS X",
"$region": "California",
"$timezone": "America/Los_Angeles",
"$transactions": [
{
"$amount": 0.99,
"$time": "2017-01-24T13:43:30.000Z"
}
],
"Favorite Genre": "Rock",
"Lifetime Song Play Count": 1,
"Lifetime Song Purchase Count": 1,
"Plan": "Premium"
},
"last_seen": 1485294450309,
"labels": [
]
}'
people_b <- json %>%
spread_values(
distinct_id = jstring("distinct_id"),
time_id = jnumber("time"),
last_seen = jstring("last_seen"),
label = jstring("label")) %>% # extract user data
enter_object("properties") %>% # stack the properties
spread_values(
browser = jstring("$browser"),
browser_version = jnumber("$browser_version"),
city = jstring("$city"),
country_code = jstring("$country_code"),
email = jstring("$email"),
initial_referrer = jstring("$initial_referrer"),
initial_referring_domain = jstring("$initial_referring_domain"),
name = jstring("$name"),
operating_system = jstring("$os"),
region = jstring("$region"),
timezone = jstring("$timezone"),
favorite_genre = jstring("Favorite Genre"),
first_login_date = jstring("First Login Date"),
lifetime_song_play_count = jnumber("Lifetime Song Play Count"),
lifetime_song_purchase_count = jnumber("Lifetime Song Purchase Count"),
plan = jstring("Plan")) %>% #extract the properties)
enter_object("$transactions") %>% #<<<--- EDITED HERE
gather_array %>%
spread_values(
amount = jnumber("$amount"),
transaction_time = jstring("$time")) %>% # extract the transactions
select(distinct_id, time_id, last_seen, label, browser, browser_version, city, country_code, email, initial_referrer,
initial_referring_domain, name, operating_system, region, timezone, favorite_genre,
first_login_date,lifetime_song_play_count, lifetime_song_purchase_count, plan, amount, transaction_time)
nrow(people_b)
## [1] 1