我的JSON文件如下所示:
/* 0 */
{
"_id" : "93ccbdb6-8947",
"uiSearchRequest" : {
"travelDate" : 20151206,
"travelDuration" : 7,
"shopperDuration" : 30,
"oneWay" : false,
"userId" : "ATP1KKP",
"queryId" : "93ccbdb6-8947",
"subRequests" : [{
"origin" : "WAS",
"destination" : "LON",
"carrier" : "AA",
"fareClasses" : "",
"owrt" : "1,2"
}]
},
"downloadCount" : 0,
"requestDate" : 20151205,
"totalRecords" : 0,
"status" : "SUCCESS"
}
/* 1 */
{
"_id" : "b736c374-b8ae",
"uiSearchRequest" : {
"travelDate" : 20151206,
"travelDuration" : 7,
"shopperDuration" : 30,
"oneWay" : false,
"userId" : "ATP1KKP",
"queryId" : "b736c374-b8ae",
"subRequests" : [{
"origin" : "WAS",
"destination" : "LON",
"carrier" : "AA",
"fareClasses" : "",
"owrt" : "1,2"
}]
},
"downloadCount" : 0,
"requestDate" : 20151205,
"totalRecords" : 0,
"status" : "SUCCESS"
}
/* 2 */
{
"_id" : "3312605f-8304",
"uiSearchRequest" : {
"travelDate" : 20151206,
"travelDuration" : 7,
"shopperDuration" : 30,
"oneWay" : false,
"userId" : "ATP1SXE",
"queryId" : "3312605f-8304",
"subRequests" : [{
"origin" : "LON",
"destination" : "IAD",
"carrier" : "AA",
"fareClasses" : "",
"owrt" : "1,2"
}]
},
"downloadCount" : 2,
"requestDate" : 20151205,
"totalRecords" : 0,
"status" : "SUCCESS"
}
/* 3 */
{
"_id" : "6b668cfa-9b79",
"uiSearchRequest" : {
"travelDate" : 20151206,
"travelDuration" : 7,
"shopperDuration" : 30,
"oneWay" : false,
"userId" : "ATP1NXA",
"queryId" : "6b668cfa-9b79",
"subRequests" : [{
"origin" : "WAS",
"destination" : "LON",
"carrier" : "AA",
"fareClasses" : "",
"owrt" : "1,2"
}]
},
"downloadCount" : 1,
"requestDate" : 20151205,
"totalRecords" : 1388,
"status" : "SUCCESS"
}
/* 4 */
{
"_id" : "41c373a1-e4cb",
"uiSearchRequest" : {
"travelDate" : 20151206,
"travelDuration" : 7,
"shopperDuration" : 30,
"oneWay" : false,
"userId" : "ATP6CXS",
"queryId" : "41c373a1-e4cb",
"subRequests" : [{
"origin" : "WAS",
"destination" : "LON",
"carrier" : "AA",
"fareClasses" : "",
"owrt" : "1,2"
}]
},
"downloadCount" : 0,
"requestDate" : 20151205,
"totalRecords" : 1388,
"status" : "SUCCESS"
}
/* 5 */
{
"_id" : "2c8331c4-21ca",
"uiSearchRequest" : {
"travelDate" : 20151206,
"travelDuration" : 7,
"shopperDuration" : 30,
"oneWay" : false,
"userId" : "ATP1KKP",
"queryId" : "2c8331c4-21ca",
"subRequests" : [{
"origin" : "WAS",
"destination" : "LON",
"carrier" : "AA",
"fareClasses" : "",
"owrt" : "1,2"
}]
},
"downloadCount" : 0,
"requestDate" : 20151205,
"totalRecords" : 1388,
"status" : "SUCCESS"
}
/* 6 */
{
"_id" : "71a09900-1c13",
"uiSearchRequest" : {
"travelDate" : 20151206,
"travelDuration" : 7,
"shopperDuration" : 30,
"oneWay" : false,
"userId" : "ATP6CXS",
"queryId" : "71a09900-1c13",
"subRequests" : [{
"origin" : "WAS",
"destination" : "LON",
"carrier" : "AF",
"fareClasses" : "",
"owrt" : "1,2"
}, {
"origin" : "WAS",
"destination" : "LON",
"carrier" : "AA",
"fareClasses" : "",
"owrt" : "1,2"
}, {
"origin" : "WAS",
"destination" : "LON",
"carrier" : "DL",
"fareClasses" : "",
"owrt" : "1,2"
}, {
"origin" : "WAS",
"destination" : "LON",
"carrier" : "LH",
"fareClasses" : "",
"owrt" : "1,2"
}, {
"origin" : "WAS",
"destination" : "LON",
"carrier" : "BA",
"fareClasses" : "",
"owrt" : "1,2"
}]
},
"downloadCount" : 0,
"requestDate" : 20151205,
"totalRecords" : 6941,
"status" : "SUCCESS"
}
/* 7 */
{
"_id" : "a036a42a-918b",
"uiSearchRequest" : {
"travelDate" : 20151206,
"travelDuration" : 7,
"shopperDuration" : 30,
"oneWay" : false,
"userId" : "ATP1MMM",
"queryId" : "a036a42a-918b",
"subRequests" : [{
"origin" : "WAS",
"destination" : "LON",
"carrier" : "AA",
"fareClasses" : "",
"owrt" : "1,2"
}]
},
"downloadCount" : 0,
"requestDate" : 20151205,
"totalRecords" : 1388,
"status" : "SUCCESS"
}
/* 8 */
{
"_id" : "c547be36-805c",
"uiSearchRequest" : {
"travelDate" : 20151206,
"travelDuration" : 7,
"shopperDuration" : 30,
"oneWay" : false,
"userId" : "ATP1SXB",
"queryId" : "c547be36-805c",
"subRequests" : [{
"origin" : "CHI",
"destination" : "LON",
"carrier" : "BA",
"fareClasses" : "",
"owrt" : "1,2"
}]
},
"downloadCount" : 2,
"requestDate" : 20151205,
"totalRecords" : 1072,
"status" : "SUCCESS"
}
我的代码如下:
raw <- readLines("mydata.txt")
#since my JSON file isn't in the correct format, I've added the below 3 lines of code
# get rid of the "/* 0 */" lines
json <- grep("^/\\* [0-9]* \\*/", raw, value = TRUE, invert = TRUE)
# add missing comma after }
n <- length(json)
json[-n] <- gsub("^}$", "},", json[-n])
# add brakets at the beginning and end
json <- c("[", json, "]")
library(jsonlite)
table <- fromJSON(json)
final <- flatten(table)
final1 <- as.data.frame.matrix(final)
class(final1)
[1] "data.frame"
write.xlsx(final1, file="JSON2excel.xlsx",row.names = FALSE)
file.show("JSON2excel.xlsx")
我收到以下错误:
Error in .jcall(cell, "V", "setCellValue", value) :
method setCellValue with signature ([Ljava/lang/String;)V not found
In addition: Warning message:
In if (is.na(value)) { :
the condition has length > 1 and only the first element will be used
我浏览了多个具有相同类型错误的stackoverflow问题,但解决方案在我的情况下不起作用。任何帮助表示赞赏。
答案 0 :(得分:1)
错误原因是您的最终结果数据框包含json文件中嵌套子请求部分的嵌套数据帧。您可以使用str(final1)
查看。因此,即使基本函数write.table()
和write.csv()
(包括xlsx软件包write.xlsx()
)也无法输出为平面格式。
通过绑定子请求数据框并使用行的id变量将它们合并到更大的final1列来考虑展平。最终,您将获得13个观测值的数据帧(不是来自json的9个元素,因为其中一个包含5个嵌套子请求:id = 71a09900-1c13)。
# SUBREQUEST BINDING (PULLING CORRESPONDING ID)
dfList <- lapply(1:nrow(final1), function(i){
cbind(id = final1$`_id`[[i]],
final1$uiSearchRequest.subRequests[[i]])
})
# USE DPLYR'S bind_rows() IF dfs DIFFER IN NUMBER OF COLUMNS
subdf <- bind_rows(dfList)
# subdf <- data.frame(do.call(rbind, dfList))
# FINAL1 EXTRACTION
fdf <- data.frame(
id = final1$`_id`,
travelDate = final1$uiSearchRequest.travelDate,
travelDuration = final1$uiSearchRequest.travelDuration,
shopperDuration = final1$uiSearchRequest.shopperDuration,
oneway = final1$uiSearchRequest.oneWay,
userId = final1$uiSearchRequest.userId,
queryId = final1$uiSearchRequest.queryId,
downloadCount = final1$downloadCount,
requestDate = final1$requestDate,
totalRecords = final1$totalRecords,
status = final1$status,
stringsAsFactors = FALSE,
row.names = NULL
)
# MERGE
finaldf <- merge(fdf, subdf, by="id")
或者,您可以按行迭代绑定:
dfList <- lapply(1:nrow(final1), function(i){
data.frame(
id = final1$`_id`[[i]],
travelDate = final1$uiSearchRequest.travelDate[[i]],
travelDuration = final1$uiSearchRequest.travelDuration[[i]],
shopperDuration = final1$uiSearchRequest.shopperDuration[[i]],
oneway = final1$uiSearchRequest.oneWay[[i]],
userId = final1$uiSearchRequest.userId[[i]],
queryId = final1$uiSearchRequest.queryId[[i]],
final1$uiSearchRequest.subRequests[[i]],
downloadCount = final1$downloadCount[[i]],
requestDate = final1$requestDate[[i]],
totalRecords = final1$totalRecords[[i]],
status = final1$status[[i]],
stringsAsFactors = FALSE,
row.names = NULL
)
})
finaldf <- do.call(rbind_rows, dfList)