我有一个带反斜杠的CSV文件以逃避某些逗号,如下所示:
c389c3d0-1175-465f-b2bb-1070f24d17a6,eccbc87e4b5ce2fe28308fd9f2a7baf3,{"parsed_query":{"qty":"2"\,"unit":null\,"brand":null\,"food":"apples"}\,"accuracy":"yellow"},apples,NULL,NULL,g,189.28,0.62,0.10,0.00,3.64,50.27,8.74,37.82,0.95,389.48,40.04,2015-10-19 21:24:49,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,0,NULL,NULL,NULL,NULL
5df13606-1f78-491f-a4b2-d104f82d030f,c81e728d9d4c2f636f067f89cc14862c,{"parsed_query":{"qty":"2"\,"unit":null\,"brand":null\,"food":"oranges"}\,"accuracy":"yellow"},oranges,NULL,NULL,g,137.20,0.42,0.05,0.00,2.80,35.11,6.16,23.80,2.55,464.80,64.40,2015-10-19 21:24:49,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,0,NULL,NULL,NULL,NULL
我试图将文件读入R,但我收到以下错误:
read.table出错(file = file,header = header,sep = sep,quote = quote,: 列数多于列名
有谁知道如何让R读取反斜杠作为转义字符?提前谢谢!
答案 0 :(得分:1)
如果您不想使用某人错误地卡在CSV记录中间的JSON blob,您还可以执行以下操作。它有点重量级,但它可以解决粗糙的中场JSON blob而不会打扰他们:
library(stringi)
library(tidyverse)
stri_read_lines("~/Desktop/a.csv") %>%
map_df(~{
m1 <- stri_match_first_regex(.x, "^([[:alnum:]-\\:, \\.]+)\\{")[,2]
m2 <- stri_match_last_regex(.x, "\\}([[:alnum:]-\\:, \\.]+)")[,2]
c(
stri_split_fixed(m1, ",", simplify=TRUE)[,-1],
stri_replace_first_fixed(.x, m1, "") %>% stri_replace_last_fixed(m2, ""),
stri_split_fixed(m2, ",", simplify=TRUE)[,-1]
) %>%
as.list() %>%
set_names(~sprintf("X%d", 1:length(.x)))
}) %>%
glimpse()
## Observations: 2
## Variables: 32
## $ X1 <chr> "eccbc87e4b5ce2fe28308fd9f2a7baf3", "c81e728d9d4c2f636f067f89cc14862c"
## $ X2 <chr> "", ""
## $ X3 <chr> "{\"parsed_query\":{\"qty\":\"2\"\\,\"unit\":null\\,\"brand\":null\\,\"food\":\"apples\"}\\,\"accuracy\":\"yellow\"}", "{\"parsed_query\":{\...
## $ X4 <chr> "apples", "oranges"
## $ X5 <chr> "NULL", "NULL"
## $ X6 <chr> "NULL", "NULL"
## $ X7 <chr> "g", "g"
## $ X8 <chr> "189.28", "137.20"
## $ X9 <chr> "0.62", "0.42"
## $ X10 <chr> "0.10", "0.05"
## $ X11 <chr> "0.00", "0.00"
## $ X12 <chr> "3.64", "2.80"
## $ X13 <chr> "50.27", "35.11"
## $ X14 <chr> "8.74", "6.16"
## $ X15 <chr> "37.82", "23.80"
## $ X16 <chr> "0.95", "2.55"
## $ X17 <chr> "389.48", "464.80"
## $ X18 <chr> "40.04", "64.40"
## $ X19 <chr> "2015-10-19 21:24:49", "2015-10-19 21:24:49"
## $ X20 <chr> "NULL", "NULL"
## $ X21 <chr> "NULL", "NULL"
## $ X22 <chr> "NULL", "NULL"
## $ X23 <chr> "NULL", "NULL"
## $ X24 <chr> "NULL", "NULL"
## $ X25 <chr> "NULL", "NULL"
## $ X26 <chr> "NULL", "NULL"
## $ X27 <chr> "NULL", "NULL"
## $ X28 <chr> "0", "0"
## $ X29 <chr> "NULL", "NULL"
## $ X30 <chr> "NULL", "NULL"
## $ X31 <chr> "NULL", "NULL"
## $ X32 <chr> "NULL", "NULL"