我有一个名为“features.txt”的文本文件,其中有27000行,如下所示
E.g: first_name;30 last_name;0 height;666 gender;2 //line 1
height;10 first_name;2333 last_name;66 // line 2
我还有另一个文件featureList.txt
E.g: first_name //line 1
height //line 2
last_name //line 3
gender //line 4
我想要的是“features.csv”
//列名称按“featureList.txt”
中的顺序排列first_name height last_name gender
30 666 0 2
2333 10 66 NA
这就是我所拥有的:
setwd("D:/r/social")
library(stringr)
features <- readLines("features.txt")
featuresList <- readLines("featureList.txt")
features1 <- readLines(textConnection(features, open = "r"))
featureList1 <- readLines(textConnection(featuresList, open = "r"))
fun <- function(x, y = features){
ans <- paste(x,';\\d+',sep="")
res <- gsub('\\D','',regmatches(y,regexpr(ans,y)))
res
}
res <- lapply(featureList1, fun)
names(res) <- featureList1
res <- as.data.frame(result)
res
write.csv(res, "D:/r/social/features.csv", row.names=F)
我得到的输出是
Id Choice.X0 Choice.X1
1 0.854405224 0.145594776
2 0.200898215 0.799101785
答案 0 :(得分:1)
library(tidyr)
# read in (you can also use readr::read_csv)
d <- read.table("features.txt",header=FALSE)
# add id so we can fold back later
d$id <- 1:nrow(d)
# separate columns, gather results in a list
L <- lapply(names(d[-4]), function(col)
separate_(d[c("id",col)], col , into=c("variable","value"),sep=";") )
# you can also use dplyr::bind_rows
d1 <- do.call(rbind,L)
# reshape to wide form
spread(d1,variable,value)
答案 1 :(得分:0)
在下文中,我使用features
创建向量featureList
和textConnection
,但不会从磁盘文件中读取它们。至于代码,我所要做的就是纠正正则表达式中的一个小错误(添加一个';')并将其变成一个函数。
txt1 <-
"first_name;30 last_name;0 height;666
height;10 first_name;2333 last_name;66"
txt2 <-
"first_name
height
last_name"
features <- readLines(textConnection(txt1, open = "r"))
featureList <- readLines(textConnection(txt2, open = "r"))
fun <- function(x, y = features){
ans <- paste(x,';\\d+',sep="")
res <- gsub('\\D','',regmatches(y,regexpr(ans,y)))
res
}
res <- lapply(featureList, fun)
names(res) <- featureList
res <- as.data.frame(res)
res
first_name height last_name
1 30 666 0
2 2333 10 66
write.csv(res, "D:/r/social/features.csv", row.names=F)