将JSON转换/解析为R中的数据框

时间:2014-12-04 17:30:46

标签: json r dataframe jsonlite

我有以下JSON

  

file.txt的

     

[{"度量":"创建""标记" {"主机":" sdsn13" "簇":" CDW""类型":" SG"}" aggregateTags":[], " DPS" {" 1417621083":71.72777777777777," 1417621204":70.76859504132231," 1417621384":70.92222222222222," 1417621564&#34 ;:70.84444444444445," 1417621623":71.32203389830508," 1417621803":70.92777777777778," 1417621925":70.60655737704919," 1417621983":71.17241379310344,&# 34; 1417622043":70.96666666666667," 1417622223":90.05555555555556," 1417622403":96.81666666666666," 1417622464":95.36065573770492," 1417622644" :96.65" 1417622824":80.13333333333334," 1417623003":67.6536312849162," 1417623363":67.375," 1417623424":66.39344262295081,&#34 ; 1417623543":67.6890756302521," 1417623724":67.29834254143647," 1417623784":66.78333333333333," 1417623964":68.99444444444444," 1417624144& #34;:71.47777777777777," 1417624323":71.71508379888269," 1417624503":71.7611111111111," 1417624563":66.66666666666667}}]

我使用了jsonlite并且能够读取JSON并将其转换为数据框

k <- jsonlite::fromJSON("file.txt",simplifyDataFrame= T)
> head(k)
                            metric          tags.host tags.cluster     tags.type dps.1417621083 dps.1417621204 dps.1417621384 dps.1417621564
1 create sdsn13     cdw SG       71.72778        70.7686       70.92222       70.84444
  dps.1417621623 dps.1417621803 dps.1417621925 dps.1417621983 dps.1417622043 dps.1417622223 dps.1417622403 dps.1417622464 dps.1417622644
1       71.32203       70.92778       70.60656       71.17241       70.96667       90.05556       96.81667       95.36066          96.65
  dps.1417622824 dps.1417623003 dps.1417623363 dps.1417623424 dps.1417623543 dps.1417623724 dps.1417623784 dps.1417623964 dps.1417624144
1       80.13333       67.65363         67.375       66.39344       67.68908       67.29834       66.78333       68.99444       71.47778
  dps.1417624323 dps.1417624503 dps.1417624563
1       71.71508       71.76111       66.66667

我尝试做的是获取以下格式的数据框:

> head(k)
V1         V2        
1417621083 71.72778  
1417621204 70.76860  
1417621384 70.92222  
1417621564 70.84444  

但我最终得到以下内容,并且我不确定如何将unix时间戳分隔到另一个字段,因为它会消失:

try <- k[[3]]
try <- as.data.frame(t(try))
> head(try)
           1        
1417621083 71.72778  
1417621204 70.76860  
1417621384 70.92222  
1417621564 70.84444  

> colnames(try)
[1] "1"
colnames(try) <- c("one")

> data.frame(do.call('rbind', strsplit(as.character(try$one),' ',fixed=TRUE)))
   do.call..rbind...strsplit.as.character.try.one........fixed...TRUE..
1                                                      71.7277777777778
2                                                      70.7685950413223
3                                                      70.9222222222222
4                                                      70.8444444444444

library(stringr)
str_split_fixed(try$one, " ", 2)
      [,1]               [,2]
 [1,] "71.7277777777778" ""  
 [2,] "70.7685950413223" ""  
 [3,] "70.9222222222222" ""  
 [4,] "70.8444444444444" ""  
 [5,] "71.3220338983051" ""  

如果我在这里错过了什么,有人可以帮忙吗?

1 个答案:

答案 0 :(得分:1)

你可以尝试

 Unl <- unlist(k$dps)
 k1 <- data.frame(V1=names(Unl), V2=Unl, stringsAsFactors=FALSE)
 row.names(k1) <- NULL
 head(k1)
 #        V1       V2
 #1 1417621083 71.72778
 #2 1417621204 70.76860
 #3 1417621384 70.92222
 #4 1417621564 70.84444
 #5 1417621623 71.32203
 #6 1417621803 70.92778

 str(k1)
 #'data.frame': 25 obs. of  2 variables:
 #$ V1: chr  "1417621083" "1417621204" "1417621384" "1417621564" ...
 #$ V2: num  71.7 70.8 70.9 70.8 71.3 ...

更新

或者您可以使用stack并更改列名。

  stack(k$dps)