使用R在excel / csv中导出data.frame时出错

时间:2016-06-28 19:25:07

标签: r

我的JSON文件如下所示:

/* 0 */
{
  "_id" : "93ccbdb6-8947",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1KKP",
    "queryId" : "93ccbdb6-8947",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 0,
  "requestDate" : 20151205,
  "totalRecords" : 0,
  "status" : "SUCCESS"
}

/* 1 */
{
  "_id" : "b736c374-b8ae",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1KKP",
    "queryId" : "b736c374-b8ae",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 0,
  "requestDate" : 20151205,
  "totalRecords" : 0,
  "status" : "SUCCESS"
}

/* 2 */
{
  "_id" : "3312605f-8304",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1SXE",
    "queryId" : "3312605f-8304",
    "subRequests" : [{
        "origin" : "LON",
        "destination" : "IAD",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 2,
  "requestDate" : 20151205,
  "totalRecords" : 0,
  "status" : "SUCCESS"
}

/* 3 */
{
  "_id" : "6b668cfa-9b79",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1NXA",
    "queryId" : "6b668cfa-9b79",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 1,
  "requestDate" : 20151205,
  "totalRecords" : 1388,
  "status" : "SUCCESS"
}

/* 4 */
{
  "_id" : "41c373a1-e4cb",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP6CXS",
    "queryId" : "41c373a1-e4cb",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 0,
  "requestDate" : 20151205,
  "totalRecords" : 1388,
  "status" : "SUCCESS"
}

/* 5 */
{
  "_id" : "2c8331c4-21ca",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1KKP",
    "queryId" : "2c8331c4-21ca",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 0,
  "requestDate" : 20151205,
  "totalRecords" : 1388,
  "status" : "SUCCESS"
}

/* 6 */
{
  "_id" : "71a09900-1c13",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP6CXS",
    "queryId" : "71a09900-1c13",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AF",
        "fareClasses" : "",
        "owrt" : "1,2"
      }, {
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }, {
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "DL",
        "fareClasses" : "",
        "owrt" : "1,2"
      }, {
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "LH",
        "fareClasses" : "",
        "owrt" : "1,2"
      }, {
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "BA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 0,
  "requestDate" : 20151205,
  "totalRecords" : 6941,
  "status" : "SUCCESS"
}

/* 7 */
{
  "_id" : "a036a42a-918b",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1MMM",
    "queryId" : "a036a42a-918b",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 0,
  "requestDate" : 20151205,
  "totalRecords" : 1388,
  "status" : "SUCCESS"
}

/* 8 */
{
  "_id" : "c547be36-805c",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1SXB",
    "queryId" : "c547be36-805c",
    "subRequests" : [{
        "origin" : "CHI",
        "destination" : "LON",
        "carrier" : "BA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 2,
  "requestDate" : 20151205,
  "totalRecords" : 1072,
  "status" : "SUCCESS"
}

我的代码如下:

raw <- readLines("mydata.txt")
#since my JSON file isn't in the correct format, I've added the below 3 lines of code

# get rid of the "/* 0 */" lines
json <- grep("^/\\* [0-9]* \\*/", raw, value = TRUE, invert = TRUE)

# add missing comma after }
n <- length(json)
json[-n] <- gsub("^}$", "},", json[-n])

# add brakets at the beginning and end
json <- c("[", json, "]")


library(jsonlite)
table <- fromJSON(json)

final <- flatten(table)
final1 <- as.data.frame.matrix(final)

class(final1)
[1] "data.frame"

write.xlsx(final1, file="JSON2excel.xlsx",row.names = FALSE)
file.show("JSON2excel.xlsx")

我收到以下错误:

Error in .jcall(cell, "V", "setCellValue", value) : 
  method setCellValue with signature ([Ljava/lang/String;)V not found
In addition: Warning message:
In if (is.na(value)) { :
  the condition has length > 1 and only the first element will be used

我浏览了多个具有相同类型错误的stackoverflow问题,但解决方案在我的情况下不起作用。任何帮助表示赞赏。

1 个答案:

答案 0 :(得分:1)

错误原因是您的最终结果数据框包含json文件中嵌套子请求部分的嵌套数据帧。您可以使用str(final1)查看。因此,即使基本函数write.table()write.csv()(包括xlsx软件包write.xlsx())也无法输出为平面格式。

通过绑定子请求数据框并使用行的id变量将它们合并到更大的final1列来考虑展平。最终,您将获得13个观测值的数据帧(不是来自json的9个元素,因为其中一个包含5个嵌套子请求:id = 71a09900-1c13)。

# SUBREQUEST BINDING (PULLING CORRESPONDING ID)
dfList <- lapply(1:nrow(final1), function(i){
                cbind(id = final1$`_id`[[i]],
                      final1$uiSearchRequest.subRequests[[i]])
})

# USE DPLYR'S bind_rows() IF dfs DIFFER IN NUMBER OF COLUMNS
subdf <- bind_rows(dfList) 
# subdf <-  data.frame(do.call(rbind, dfList))

# FINAL1 EXTRACTION
fdf <-  data.frame(
                id = final1$`_id`,
                travelDate = final1$uiSearchRequest.travelDate,
                travelDuration = final1$uiSearchRequest.travelDuration,
                shopperDuration = final1$uiSearchRequest.shopperDuration,
                oneway = final1$uiSearchRequest.oneWay,
                userId = final1$uiSearchRequest.userId,
                queryId = final1$uiSearchRequest.queryId,
                downloadCount = final1$downloadCount,
                requestDate = final1$requestDate,
                totalRecords = final1$totalRecords,
                status = final1$status,

                stringsAsFactors = FALSE,
                row.names = NULL
              )

# MERGE
finaldf <- merge(fdf, subdf, by="id")

或者,您可以按行迭代绑定:

dfList <- lapply(1:nrow(final1), function(i){      
              data.frame(
                id = final1$`_id`[[i]],
                travelDate = final1$uiSearchRequest.travelDate[[i]],
                travelDuration = final1$uiSearchRequest.travelDuration[[i]],
                shopperDuration = final1$uiSearchRequest.shopperDuration[[i]],
                oneway = final1$uiSearchRequest.oneWay[[i]],
                userId = final1$uiSearchRequest.userId[[i]],
                queryId = final1$uiSearchRequest.queryId[[i]],
                final1$uiSearchRequest.subRequests[[i]],
                downloadCount = final1$downloadCount[[i]],
                requestDate = final1$requestDate[[i]],
                totalRecords = final1$totalRecords[[i]],
                status = final1$status[[i]],

                stringsAsFactors = FALSE,
                row.names = NULL
              )      
         })

finaldf <- do.call(rbind_rows, dfList)