我有以下文字,内容如下
#------------------
# CONTENTS OF TEXT
#------------------
H01, H04, G02, G06,
H01, H02, G02, H05,
G01, H04, H01
G09, G05
我想将此数据转换为二进制矩阵。我希望输出像这样
H01 H02 H04 H05 G01 G02 G05 G06 G09
1 0 1 0 0 1 0 1 0
1 1 0 1 0 1 0 0 0
1 0 1 0 1 0 0 0 0
0 0 0 0 0 0 1 0 1
请帮忙
答案 0 :(得分:3)
你可以这样做:
d <- read.table(header=FALSE, sep='§', stringsAsFactors = FALSE, text=
'H01, H04, G02, G06,
H01, H02, G02, H05,
G01, H04, H01
G09, G05')
s <- sort(unique(unlist(strsplit(d$V1, ', *'))))
m <- sapply(s, grepl, x=d$V1, fixed=TRUE)
# > m
# G01 G02 G05 G06 G09 H01 H02 H04 H05
# [1,] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE
# [2,] FALSE TRUE FALSE FALSE FALSE TRUE TRUE FALSE TRUE
# [3,] TRUE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE
# [4,] FALSE FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE
m[] <- as.integer(m)
# > m
# G01 G02 G05 G06 G09 H01 H02 H04 H05
# [1,] 0 1 0 1 0 1 0 1 0
# [2,] 0 1 0 0 0 1 1 0 1
# [3,] 1 0 0 0 0 1 0 1 0
# [4,] 0 0 1 0 1 0 0 0 0
答案 1 :(得分:1)
使用@jogo数据的另一个想法:
library(dplyr)
library(tidyr)
d %>%
mutate(V1 = stringi::stri_extract_all_words(V1), V2 = 1) %>%
unnest(V1, .id = "id") %>%
spread(V1, V2, fill = 0)
给出了:
# id G01 G02 G05 G06 G09 H01 H02 H04 H05
#1 1 0 1 0 1 0 1 0 1 0
#2 2 0 1 0 0 0 1 1 0 1
#3 3 1 0 0 0 0 1 0 1 0
#4 4 0 0 1 0 1 0 0 0 0