我有以下格式的csv文件,我想将其读入R studio。我面临的问题是,同一文件中有多个表之类的数据,并用尖括号(如{{1 }}
我想将标签<header>,<member details> etc.
下的每个表读入R中的单独对象中。我该怎么做?
请注意,有两个<header>,<member details> etc.
标签,并且<member details>
出现在每个<total interest>
标签下,显示了<member details>
的总和
数据
<member details>
答案 0 :(得分:1)
library(stringr)
library(data.table)
txt <- "
<header>
id, cust_name
28, Mr.X
<member details>
Account_No,date,balance,interest
12345,23/12/2018,200000,12.0
12345,25/12/2018,300000,13.0
<total interest>
credit, settlement_acc
25,98765
<member details>
Account_No,date,balance,interest
6789,23/12/2018,200000,19.0
6789,25/12/2018,300000,12.0
<total interest>
credit, settlement_acc
31,98765
"
fl <- tempfile()
cat(txt, file = fl)
data <- readLines(fl)
idx <- str_detect(data, '^\\s?$')
idx <- which(idx)
data <- data[ -idx ]
idx <- str_detect(data, '<.*>')
idx <- which(idx)
cn <- data[ idx + 1 ]
cn <- lapply(cn, str_split, ',')
cn <- lapply(cn, unlist)
cn <- lapply(cn, str_trim)
mydata <- idx + 2
mydata <- paste0(mydata, ':', c(idx[ -1 ] - 1, length(data)))
mydata <- lapply(mydata, function(x) data[ eval(parse(text = x)) ])
mydata <- lapply(mydata, str_split, ',')
mydata <- lapply(mydata, function(x) rbindlist(lapply(lapply(x, t), data.frame)))
lapply(1:length(mydata), function(i) {
colnames(mydata[[ i ]]) <<- cn[[ i ]]
})
names(mydata) <- str_replace_all(data[ idx ], '[<>]', '')
print(mydata)