如何将以下因子转换为数据框?
dput(d)
structure(1L, .Label = "c(\"CONTEXTLESS\", \"CONTEXTLESS\", \"CONTEXTLESS\", \"CONTEXTLESS\", \"CONTEXTLESS\", \"CONTEXTLESS\", \"CONTEXTLESS\", \"CONTEXTLESS\"), c(\"app1\", \"client\", \"org\", \"app1\", \"DATA_CENTER\", \"PURPOSE\", \"REGION\", \"Test\"), c(NA, \"NONE\", \"Host:Environment:test123\", \"111\", \"222\", \"GENERAL\", \"444\", \"555\")", class = "factor")
> d
[1] c("CONTEXTLESS", "CONTEXTLESS", "CONTEXTLESS", "CONTEXTLESS", "CONTEXTLESS", "CONTEXTLESS", "CONTEXTLESS", "CONTEXTLESS"), c("app1", "client", "org", "app1", "DATA_CENTER", "PURPOSE", "REGION", "Test"), c(NA, "NONE", "Host:Environment:test123", "111", "222", "GENERAL", "444", "555")
Levels: c("CONTEXTLESS", "CONTEXTLESS", "CONTEXTLESS", "CONTEXTLESS", "CONTEXTLESS", "CONTEXTLESS", "CONTEXTLESS", "CONTEXTLESS"), c("app1", "client", "org", "app1", "DATA_CENTER", "PURPOSE", "REGION", "Test"), c(NA, "NONE", "Host:Environment:test123", "111", "222", "GENERAL", "444", "555")
答案 0 :(得分:2)
也许尝试一下,让我知道是否适合您。您可以将字符连贯,然后为字符串放置一个公共分隔符。这里的代码:
#Code
d1 <- as.character(d)
d1 <- gsub(', c','*c',d1)
d1 <- strsplit(d1,split='\\*')
df <- as.data.frame(do.call(cbind,d1))
df
的输出如下所示:
在获得一些新的详细信息之后,请在清理它们后使用tidyverse
来分隔行:
library(tidyverse)
#Code
d1 <- as.character(d)
d1 <- gsub(', c','*c',d1)
d1 <- strsplit(d1,split='\\*')
df <- as.data.frame(do.call(cbind,d1),stringsAsFactors = F)
#Clean
df$V1 <- gsub('\"','',df$V1)
df$V1 <- gsub("c(","",df$V1,fixed=T)
df$V1 <- gsub(")","",df$V1)
#Format
newdf <- df %>% mutate(id=row_number()) %>%
separate_rows(V1,sep = ',') %>%
mutate(V1=trimws(V1)) %>%
group_by(id) %>% mutate(Var=paste0('Var.',row_number())) %>%
pivot_wider(names_from = Var,values_from=V1)
输出:
# A tibble: 3 x 9
# Groups: id [3]
id Var.1 Var.2 Var.3 Var.4 Var.5 Var.6 Var.7 Var.8
<int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 1 CONTEXTLE~ CONTEXTLE~ CONTEXTLESS CONTEXTL~ CONTEXTL~ CONTEXTL~ CONTEXTL~ CONTEXTL~
2 2 app1 client org app1 DATA_CEN~ PURPOSE REGION Test
3 3 NA NONE Host:Environment:t~ 111 222 GENERAL 444 555
答案 1 :(得分:1)
如果从Duck的前三行代码开始,则可以从以下位置解析(显然)deparse()
-d的R代码:
#Code
d1 <- as.character(d)
d1 <- gsub(', c','*c',d1)
d1 <- strsplit(d1,split='\\*')
d1[[1]][2]
#[1] "c(\"app1\", \"client\", \"org\", \"app1\", \"DATA_CENTER\", \"PURPOSE\", \"REGION\", \"Test\")"
如果您使用lapply
来串行应用eval(parse(text=.)
函数,那么您将获得一个对象,该对象可以被强制转换为长格式,并在注释中提示您想要:
setNames(lapply(d1[[1]], function(x) eval(parse(text=x))),paste0("V", 1:3))
$V1
[1] "CONTEXTLESS" "CONTEXTLESS" "CONTEXTLESS" "CONTEXTLESS" "CONTEXTLESS" "CONTEXTLESS"
[7] "CONTEXTLESS" "CONTEXTLESS"
$V2
[1] "app1" "client" "org" "app1" "DATA_CENTER" "PURPOSE"
[7] "REGION" "Test"
$V3
[1] NA "NONE" "Host:Environment:test123"
[4] "111" "222" "GENERAL"
[7] "444" "555"
因此:
> d2 <- setNames(lapply(d1[[1]], function(x) eval(parse(text=x))),paste0("V", 1:3))
> data.frame( d2[2:3])
V2 V3
1 app1 <NA>
2 client NONE
3 org Host:Environment:test123
4 app1 111
5 DATA_CENTER 222
6 PURPOSE GENERAL
7 REGION 444
8 Test 555
答案 2 :(得分:1)
parse
中的文本仅为eval
和data.frame(...)
。无需重新发明轮子。
df <- eval(parse(text = paste0("data.frame(", as.character(d), ")")))
names(df) <- paste0("v", seq_along(df))
输出
v1 v2 v3
1 CONTEXTLESS app1 <NA>
2 CONTEXTLESS client NONE
3 CONTEXTLESS org Host:Environment:test123
4 CONTEXTLESS app1 111
5 CONTEXTLESS DATA_CENTER 222
6 CONTEXTLESS PURPOSE GENERAL
7 CONTEXTLESS REGION 444
8 CONTEXTLESS Test 555
答案 3 :(得分:0)
我们可以使用str_match_all
提取c(....)
之间的所有内容,在逗号上分割字符串并绑定数据集。
do.call(rbind, lapply(stringr::str_match_all(d, 'c\\((.*?)\\)'), function(x) {
do.call(rbind, strsplit(gsub('"', '',x[, 2]), ',\\s*'))
}))