Question

我是R的新手，现在面临解析数据集中json列的问题，我已经通过几乎所有关于解析json的线程，但我不能找到一个合适的解决方案......因为我相信我的问题有点不同：

以下是我的情况：

我正在使用R通过ODBC＆amp;＆amp; amp;来连接到SQL数据库。找到我需要的桌子：

TCbigdata是目标json列而json看起来像：

{
"memberid": "30325292",
"hotelgroup": {
    "g_orders": "-1",
    "g_sfristcreatedate": "-1",
    "g_lastcreatedate": "-1",
    "g_slastcreatedate": "-1",
    "g_fristcreatedate": "-1"
},
"visa": {
    "v_orders": "-1",
    "v_maxcountryid": "-1",
    "v_lastsorderdate": "-1",
    "v_maxvisaperson": "-1",
    "v_lastorderdate": "-1",
    "v_lastvisacountryid": "-1",
    "v_sorders": "-1"
},
"callcentertel": {
    "lastcctzzycalldate": "-1",
    "ishavecctcomplaintcall": "-1",
    "lastcctchujingcalldate": "-1",
    "lastcctyouluncalldate": "-1"
}....(key n, key n+1.. etc)..}

** 我的愿望输出将是所有嵌套的变量，如果可能的话，我想 DELETE memberid＆amp;＆amp;酒店集团＆amp;＆amp;签证＆amp;＆amp; callcentertel＆amp;＆amp;等等这样的组密钥，所以，

1。解析列就像＆＃34; g_orders ... v_orders..lastcct ....等＆＃34;在一个数据集没有键，例如＆＃34; hotelgroup＆＃34;，＆＃34; visa＆＃34;，＆＃34; callcentertel＆＃34; ...等...;

2。或者，将其解析为多个数据集＆＃34; hotelgroup＆＃34; table，COLUMN - ＆＃34; g_orders＆＃34; +＆＃34; g_sfristcreatedate＆＃34; ..... ＆＃34;签证＆＃34; table，COLUMN - ＆＃34; v_orders＆＃34; +＆＃34; v_maxcountryid＆＃34; .....

我不确定是否有这样的问题包？

============问题描述＆amp;＆amp;渴望输出=================

我使用jsonlite / rjsonio / tidyjson搜索了几个演示，但未找到合适的路径。

**我觉得令人困惑的另一部分是我的数据集，它来自数据仓库，通过ODBC，返回＆＃34; factor＆＃34; ＆＃34; TCbigdata＆＃34;的类型，而不是＆＃34;字符＆＃34;我假设：

就像在DW中一样：

================我的代码...... TBC ======================== / p>

这是我的代码：

# SQL TABLE  orgtc <- sqlQuery(channel1,'SELECT idMemberInfo,memberid, refbizid, crttime, TCbigdata  FROM tcbiz_fq_rcs_data.MemberInfo ') 
# Convert var_type   orgjf$JFMemberPortrait<- as.character( orgjf$JFMemberPortrait )    
# ?????  ----library(jsonlite)   l <- fromJSON(orgjf$JFMemberPortrait, simplifyDataFrame = FALSE) ---- TBD

感谢您的帮助！

Answer 1

有趣的问题。实际上有两件事：

从DW中获取JSON
将JSON解析为您想要的输出

看起来你已经取得了不错的进展，从DW中获取了JSON。我不确定您使用什么连接，但我建议使用new-ish odbc包，它有一个很好的DBI接口。

（请记住，reproducible examples对于快速获得帮助非常重要）。从DW获得数据后，您应该拥有我在下面制作的data_frame。

此外，如果你想使用tidyjson（我的偏好），那么你需要知道它不在CRAN中，jeremystan/tidyjson的开发版本具有有用的功能（并且被破坏了新的dplyr）。在这里，我使用我的回购中的开发版：

suppressPackageStartupMessages(library(tidyverse))                                                                    
# devtools::install_github("colearendt/tidyjson")                                                                     
suppressPackageStartupMessages(library(tidyjson))                                                                     
raw_json <- '{                                                                                                        
"memberid": "30325292",                                                                                               
"hotelgroup": {                                                                                                       
"g_orders": "-1",                                                                                                     
"g_sfristcreatedate": "-1",                                                                                           
"g_lastcreatedate": "-1",                                                                                             
"g_slastcreatedate": "-1",                                                                                            
"g_fristcreatedate": "-1"                                                                                             
},                                                                                                                    
"visa": {                                                                                                             
"v_orders": "-1",                                                                                                     
"v_maxcountryid": "-1",                                                                                               
"v_lastsorderdate": "-1",                                                                                             
"v_maxvisaperson": "-1",                                                                                              
"v_lastorderdate": "-1",                                                                                              
"v_lastvisacountryid": "-1",                                                                                          
"v_sorders": "-1"                                                                                                     
},                                                                                                                    
"callcentertel": {                                                                                                    
"lastcctzzycalldate": "-1",                                                                                           
"ishavecctcomplaintcall": "-1",                                                                                       
"lastcctchujingcalldate": "-1",                                                                                       
"lastcctyouluncalldate": "-1"                                                                                         
}                                                                                                                     
}'                                                                                                                    

dw_data <- data_frame(                                                                                                
idMemberInfo = c(1:10)                                                                                                
, TCbigdata = as.character(lapply(c(1:10),function(x){return(raw_json)}))                                             
)                                                                                                                     

dw_data                                                                                                               
#> # A tibble: 10 x 2
#>    idMemberInfo TCbigdata                                                 
#>           <int> <chr>                                                     
#>  1            1 "{                                                       …
#>  2            2 "{                                                       …
#>  3            3 "{                                                       …
#>  4            4 "{                                                       …
#>  5            5 "{                                                       …
#>  6            6 "{                                                       …
#>  7            7 "{                                                       …
#>  8            8 "{                                                       …
#>  9            9 "{                                                       …
#> 10           10 "{                                                       …

# convert to tbl_json                                                                                                 
dw_json <- as.tbl_json(dw_data, json.column = "TCbigdata")                                                            

# option 1 - let tidyjson do the work for you                                                                         
# - you will need to rename                                                                                           
opt_1 <- dw_json %>% spread_all()                                                                                     
names(opt_1)                                                                                                          
#>  [1] "idMemberInfo"                        
#>  [2] "memberid"                            
#>  [3] "hotelgroup.g_orders"                 
#>  [4] "hotelgroup.g_sfristcreatedate"       
#>  [5] "hotelgroup.g_lastcreatedate"         
#>  [6] "hotelgroup.g_slastcreatedate"        
#>  [7] "hotelgroup.g_fristcreatedate"        
#>  [8] "visa.v_orders"                       
#>  [9] "visa.v_maxcountryid"                 
#> [10] "visa.v_lastsorderdate"               
#> [11] "visa.v_maxvisaperson"                
#> [12] "visa.v_lastorderdate"                
#> [13] "visa.v_lastvisacountryid"            
#> [14] "visa.v_sorders"                      
#> [15] "callcentertel.lastcctzzycalldate"    
#> [16] "callcentertel.ishavecctcomplaintcall"
#> [17] "callcentertel.lastcctchujingcalldate"
#> [18] "callcentertel.lastcctyouluncalldate"

# for instance... as long as there are no conflicts                                                                   
rename_function <- function(x){                                                                                       
x[str_detect(x,"\\.")] <- str_sub(x[str_detect(x,"\\.")],str_locate(x[str_detect(x,"\\.")],"\\.")[,"start"]+1)
return(x)                                                                                                             
}                                                                                                                     
opt_1 %>%                                                                                                             
rename_all(.funs=list(rename_function)) %>%                                                                           
names()                                                                                                               
#>  [1] "idMemberInfo"           "memberid"              
#>  [3] "g_orders"               "g_sfristcreatedate"    
#>  [5] "g_lastcreatedate"       "g_slastcreatedate"     
#>  [7] "g_fristcreatedate"      "v_orders"              
#>  [9] "v_maxcountryid"         "v_lastsorderdate"      
#> [11] "v_maxvisaperson"        "v_lastorderdate"       
#> [13] "v_lastvisacountryid"    "v_sorders"             
#> [15] "lastcctzzycalldate"     "ishavecctcomplaintcall"
#> [17] "lastcctchujingcalldate" "lastcctyouluncalldate"

# option 2 - define what you want                                                                                     
# - more typing up front                                                                                              
opt_2 <- dw_json %>% spread_values(                                                                                   
g_orders = jstring(hotelgroup,g_orders)                                                                               
, g_sfristcreatedate = jstring(hotelgroup, g_sfristcreatedate)                                                        
#...                                                                                                                  
, lastcctzzycalldate = jstring(callcentertel, lastcctzzycalldate)                                                     
#...                                                                                                                  
)                                                                                                                     
names(opt_2)                                                                                                          
#> [1] "idMemberInfo"       "g_orders"           "g_sfristcreatedate"
#> [4] "lastcctzzycalldate"

希望它有所帮助！ FWIW，我希望在R社区坚持tidyjson - 类行为

如何使用R在列中使用多个键解析json

1 个答案: