我正在尝试从此csv文件中提取数据 -
"NA,NA,NA,\"KMI for Lessons Learnt\",NA,\"0\",\"0\",\"7\",\"7\",\"0\",\"7\",\"7\",\"7\""
"NA,NA,NA,\"KMI for Knowledge Documents\",NA,\"0\",\"0\",\"7\",\"10\",\"0\",\"4.1999999999999993\",\"10\",\"10\""
"(Annual Basis)\",NA,\"KMI for Innovation (Overall for IPR & MIA/MRA)\",NA,\"-\",\"-\",\"-\",\"-\",\"-\",\"-\",\"-\",\"8.5399999999999991\""
"NA,NA,NA,\"KMI for IPR\",NA,\"10\",\"10\",\"0\",\"3\",\"10\",\"10\",\"3\",\"8.6\""
"NA,NA,NA,\"KMI for MIA/MRA\",NA,\"-\",\"-\",\"-\",\"-\",\"-\",\"-\",\"-\",\"8.1999999999999993\""
"NA,NA,NA,\"KMI for Knowledge gained from ext. experts\",NA,\"10\",\"0\",\"0\",\"0\",\"10\",\"10\",\"0\",\"10\""
"NA,NA,NA,\"KMI for Technical papers publication\",NA,\"10\",\"0\",\"10\",\"0\",\"10\",\"10\",\"10\",\"10\""
"NA,NA,NA,\"Overall KMI\",NA,\"5.5\",\"2.5\",\"4.6500000000000004\",\"4.5\",\"5.5\",\"8.09\",\"6\",\"8.8849999999999998\""
我只想要跳过NA字符串和连字符的值和行名称。 输出应该像 -
\"KMI for Lessons Learnt\",\"0\",\"0\",\"7\",\"7\",\"0\",\"7\",\"7\",\"7\""
从开始读取“KMI”字符串并删除所有出现的NA和连字符。
任何帮助都将不胜感激。
感谢。
我已经编辑了这个CSV文件。希望它看起来不像以前那么难看。
NA,NA,NA,KMI for Lessons Learnt,NA,0,0,7,7,0,7,7,7
NA,NA,NA,KMI for Knowledge Documents,NA,0,0,7,10,0,4.1999999999999993,10,10
(Annual Basis),NA,KMI for Innovation (Overall for IPR & MIA/MRA),NA,NA,NA,NA,NA,NA,NA,NA,8.5399999999999991
NA,NA,NA,KMI for IPR,NA,10,10,0,3,10,10,3,8.6
NA,NA,NA,KMI for MIA/MRA,NA,NA,NA,NA,NA,NA,NA,NA,8.1999999999999993
NA,NA,NA,KMI for Knowledge gained from ext. experts,NA,10,0,0,0,10,10,0,10
NA,NA,NA,KMI for Technical papers publication,NA,10,0,10,0,10,10,10,10
NA,NA,NA,Overall KMI ,NA,5.5,2.5,4.6500000000000004,4.5,5.5,8.09,6,8.8849999999999998
答案 0 :(得分:1)
这很难看,但数据也是如此:
library(data.table)
x <- readLines('testFile.csv')
cleanLines <- function(line){
x <- gsub('.*(KMI.*)', '\\1', line)
x <- gsub(',NA', '', x)
x <- gsub('[\\"]', '', x)
x <- strsplit(x, ',')[[1]]
mat <- matrix(ncol = length(x))
mat[1,] <- x
dfTemp <- as.data.frame(mat, stringsAsFactors = FALSE)
return(dfTemp)
}
parsed <- lapply(x, cleanLines)
df <- rbindlist(parsed)
输出df:
V1 V2 V3 V4 V5 V6 V7 V8 V9
1: KMI for Lessons Learnt 0 0 7 7 0 7 7 7
2: KMI for Knowledge Documents 0 0 7 10 0 4.1999999999999993 10 10
3: KMI for Innovation (Overall for IPR & MIA/MRA) - - - - - - - 8.5399999999999991
4: KMI for IPR 10 10 0 3 10 10 3 8.6
5: KMI for MIA/MRA - - - - - - - 8.1999999999999993
6: KMI for Knowledge gained from ext. experts 10 0 0 0 10 10 0 10
7: KMI for Technical papers publication 10 0 10 0 10 10 10 10
8: KMI 5.5 2.5 4.6500000000000004 4.5 5.5 8.09 6 8.8849999999999998
答案 1 :(得分:0)
library(stringi)
library(tidyverse)
read_lines("so.csv") %>%
stri_replace_all_fixed("\\", "") %>%
stri_replace_all_regex('(^"|"$)', '') %>%
paste0(collapse="\n") %>%
read.csv(text=., header=FALSE, stringsAsFactors=FALSE)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13
## 1 NA NA NA KMI for Knowledge gained from ext. experts NA 10.0 0.0 0.00 0.0 10.0 10.00 0 10.000
## 2 NA NA NA KMI for Technical papers publication NA 10.0 0.0 10.00 0.0 10.0 10.00 10 10.000
## 3 NA NA NA Overall KMI NA 5.5 2.5 4.65 4.5 5.5 8.09 6 8.885