数据集1阵列& 2个嵌套对象产生< 0行> (或0-length row.names)错误与tidyjson

时间:2017-03-08 01:52:45

标签: arrays json r tidy

我正在使用我通过prettify()传递的AOL数据集。数据的类型和长度为:

> json_types(People)

 document.id     type

 1 ,          1 array

> json_lengths(People)

  document.id length

1 ,         1,      4

数据经过prettify()时的一瞥:

{
    "distinct_id": "159d26d852bc2-0218a9eedf5d02-1d326f50-13c680-159d26d852c2cc",
    "time": 1485294450309,
    "properties": {
        "$browser": "Chrome",
        "$browser_version": 55,
        "$city": "San Francisco",
        "$country_code": "US",
        "$email": "amir.movafaghi@mixpanel.com",
        "$initial_referrer": "$direct",
        "$initial_referring_domain": "$direct",
        "$name": "Amir MOvafaghi",
        "$os": "Mac OS X",
        "$region": "California",
        "$timezone": "America/Los_Angeles",
        "$transactions": [
            {
                "$amount": 0.99,
                "$time": "2017-01-24T13:43:30.000Z"
            }
        ],
        "Favorite Genre": "Rock",
        "Lifetime Song Play Count": 1,
        "Lifetime Song Purchase Count": 1,
        "Plan": "Premium"
    },
    "last_seen": 1485294450309,
    "labels": [

    ]
},

我建立了我的转型:

people_b <- People %>%
  gather_array %>% # stack the user data
  spread_values(
    distinct_id = jstring("distinct_id"),
    time_id = jnumber("time"),
    last_seen = jstring("last_seen"),
    label = jstring("label")) %>% # extract user data
  enter_object("properties") %>% # stack the properties
  spread_values(
    browser = jstring("$browser"),
    browser_version = jnumber("$browser_version"),
    city = jstring("$city"),
    country_code = jstring("$country_code"),
    email = jstring("$email"),
    initial_referrer = jstring("$initial_referrer"),
    initial_referring_domain = jstring("$initial_referring_domain"),
    name = jstring("$name"),
    operating_system = jstring("$os"),
    region = jstring("$region"),
    timezone = jstring("$timezone"),
    favorite_genre = jstring("Favorite Genre"),
    first_login_date = jstring("First Login Date"),
    lifetime_song_play_count = jnumber("Lifetime Song Play Count"),
    lifetime_song_purchase_count = jnumber("Lifetime Song Purchase Count"),
    plan = jstring("Plan")) %>% #extract the properties) 
  enter_object("transactions") %>%   #stack the transactions
  gather_array %>%
  spread_values(
    amount = jnumber("$amount"),
    transaction_time = jstring("$time")) %>% # extract the transactions
  select(distinct_id, time_id, last_seen, label, browser, browser_version, city, country_code, email, initial_referrer,
         initial_referring_domain, name, operating_system, region, timezone, favorite_genre,
         first_login_date,lifetime_song_play_count, lifetime_song_purchase_count, plan, amount, transaction_time)

但是我收到错误代码:

    > people_b
 [1] distinct_id                  time_id                      last_seen                    label                       
 [5] browser                      browser_version              city                         country_code                
 [9] email                        initial_referrer             initial_referring_domain     name                        
[13] operating_system             region                       timezone                     favorite_genre              
[17] first_login_date             lifetime_song_play_count     lifetime_song_purchase_count plan                        
[21] amount                       transaction_time            
<0 rows> (or 0-length row.names)

来自第二个数据集的样本输出(我仍然需要整理):

    > event_b
              name                                                  distinct_id  label         time sampling_factor browser_type
1      Page Loaded 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12               1       Chrome
2      Page Loaded 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12               1       Chrome
3          Sign Up 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12               1       Chrome
4      Page Loaded 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12               1       Chrome
5      Song Played 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12               1       Chrome
6      Song Played 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12               1       Chrome
7   Song Purchased 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12               1       Chrome
8  Plan Downgraded 159f0ddf9c437c-0b4d95a6f3b9be-123a6850-13c680-159f0ddf9c525a list() 1.485776e+12               1       Chrome

1 个答案:

答案 0 :(得分:0)

我认为您的问题出在管道的enter_object('transactions')组件中。在您的JSON对象中,您有密钥$transactions,因此您使用了错误的路径。改为'$transactions'似乎有效。

...
enter_object("$transactions") %>%   #stack the transactions
...

完整的例子。请注意,我删除了gather_array,因为您的示例只是一个对象。

json <- '{
"distinct_id": "159d26d852bc2-0218a9eedf5d02-1d326f50-13c680-159d26d852c2cc",
"time": 1485294450309,
"properties": {
"$browser": "Chrome",
"$browser_version": 55,
"$city": "San Francisco",
"$country_code": "US",
"$email": "amir.movafaghi@mixpanel.com",
"$initial_referrer": "$direct",
"$initial_referring_domain": "$direct",
"$name": "Amir MOvafaghi",
"$os": "Mac OS X",
"$region": "California",
"$timezone": "America/Los_Angeles",
"$transactions": [
{
  "$amount": 0.99,
  "$time": "2017-01-24T13:43:30.000Z"
}
],
"Favorite Genre": "Rock",
"Lifetime Song Play Count": 1,
"Lifetime Song Purchase Count": 1,
"Plan": "Premium"
},
"last_seen": 1485294450309,
"labels": [

]
}'


people_b <- json %>%
  spread_values(
    distinct_id = jstring("distinct_id"),
    time_id = jnumber("time"),
    last_seen = jstring("last_seen"),
    label = jstring("label")) %>% # extract user data
  enter_object("properties") %>% # stack the properties
  spread_values(
    browser = jstring("$browser"),
    browser_version = jnumber("$browser_version"),
    city = jstring("$city"),
    country_code = jstring("$country_code"),
    email = jstring("$email"),
    initial_referrer = jstring("$initial_referrer"),
    initial_referring_domain = jstring("$initial_referring_domain"),
    name = jstring("$name"),
    operating_system = jstring("$os"),
    region = jstring("$region"),
    timezone = jstring("$timezone"),
    favorite_genre = jstring("Favorite Genre"),
    first_login_date = jstring("First Login Date"),
    lifetime_song_play_count = jnumber("Lifetime Song Play Count"),
    lifetime_song_purchase_count = jnumber("Lifetime Song Purchase Count"),
    plan = jstring("Plan")) %>% #extract the properties) 
  enter_object("$transactions") %>%   #<<<--- EDITED HERE
  gather_array %>%
  spread_values(
    amount = jnumber("$amount"),
    transaction_time = jstring("$time")) %>% # extract the transactions
  select(distinct_id, time_id, last_seen, label, browser, browser_version, city, country_code, email, initial_referrer,
         initial_referring_domain, name, operating_system, region, timezone, favorite_genre,
         first_login_date,lifetime_song_play_count, lifetime_song_purchase_count, plan, amount, transaction_time)

nrow(people_b)
## [1] 1