这是我的数据框的示例列,RR是标题:
RR
Cvv
Cvv
Caa
我需要的是“反转”数据,以便在数据帧中获得子串vv和aa作为标题和RR。得到的矩阵将是:
vv | aa
CRR |
CRR |
| CRR
因此我们在两个矩阵中都得到了相同的关系。在第一行和第二行,vv与RR耦合。在第三行,aa与RR结合。
这可以通过R实现吗?有什么想法吗?
感谢您的期待!
我在上面的示例中过度简化了我的数据。所以这是我的实际数据集的样本:
> dput(head(A1F[4:15],n=20))
structure(list(RR = structure(c(15L, 15L, 15L, 27L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
" ", "Caa", "Caj", "Cbb", "Cbb ", "Cbv", "Cja", "Cjr", "Crj",
"Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbb", "Gbv",
"Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"), class = "factor"),
AA = structure(c(13L, 13L, 13L, 1L, 1L, 1L, 1L, 15L, 27L,
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 1L), .Label = c("",
"Caa", "Caj", "Car", "Cbb", "Cbv", "Cja", "Cjr", "Cjr ",
"Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbb",
"Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"
), class = "factor"), BB = structure(c(9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L
), .Label = c("", "?", "Caa", "Caj", "Cbv", "Cja", "Cjr",
"Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbv",
"Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"), class = "factor"),
VV = structure(c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 1L, 1L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("",
" ", "Caa", "Caj", "Caj+", "Cbb", "Cbv", "Cja", "Cjr", "Crv",
"Cvb", "Cvr", "Cvv", "Gaa", "Gbb", "Gja", "Gjr", "Grv", "Gvb",
"Gvr"), class = "factor"), RJ = structure(c(8L, 3L, 3L, 1L,
1L, 12L, 12L, 12L, 12L, 12L, 1L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L), .Label = c("", "Caa", "Caj", "Cbv",
"Ccrj", "Cja", "Cjr", "Crj", "Crj ", "Crr", "Crv", "Cvr",
"Cvv", "Gaa", "Gaj", "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv",
"Gvr", "Gvv"), class = "factor"), JR = structure(c(7L, 7L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L), .Label = c("", "Caa", "Caj",
"Cbv", "Cja", "Cjr", "Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv",
"Gaa", "Gaj", "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", "Grv ",
"Gvb", "Gvb ", "Gvr", "Gvv"), class = "factor"), BV = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), .Label = c("", "Caa", "Caj", "Cbb", "Cbv",
"Cja", "Cjr", "Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa",
"Gaj", "Gbb", "Gbv", "Gja", "Gjr", "Grj", "Grv", "Gvb", "Gvr",
"Gvv", "R"), class = "factor"), VB = structure(c(1L, 1L,
7L, 7L, 18L, 18L, 1L, 1L, 10L, 10L, 21L, 21L, 21L, 1L, 21L,
21L, 21L, 21L, 21L, 1L), .Label = c("", "Caa", "Caj", "Cbb",
"Cbv", "Cja", "Cjr", "Crj", "Crr", "Crv", "Cvb", "Cvv", "Gaa",
"Gaj", "Gbb", "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb",
"Gvr", "Gvv"), class = "factor"), AJ = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 10L,
1L, 10L, 10L), .Label = c("", "Caa", "Caj", "Cbb", "Cbv",
"Cja", "Cjr", "Crj", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj",
"Gbb", "Gbv", "Gja", "Gjr", "Grj", "Grj ", "Grr", "Grv",
"Gvb", "Gvr", "Gvv"), class = "factor"), JA = structure(c(10L,
10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 6L, 6L, 6L, 6L), .Label = c("", "Caa", "Caj", "Cbv",
"Cja", "Cjr", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj",
"Gbv", "Gja", "Gjr", "Grr", "Grv", "Gvb", "Gvv"), class = "factor"),
VR = structure(c(1L, 5L, 5L, 5L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("",
"Caa", "Caj", "Caj ", "Cbv", "Cja", "Cjr", "Crj", "Crr",
"Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbv", "Gja", "Gjr",
"Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"), class = "factor"),
RV = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 1L, 1L), .Label = c("",
"Caa", "Caj", "Cbb", "Cbv", "Cja", "Cjr", "Crj", "Crr", "Crv",
"Cvr", "Cvv", "Cvv ", "Gaa", "Gaj", "Gbb", "Gbv", "Gja",
"Gjr", "Grj", "Grr", "Grv", "Gvr", "Gvv"), class = "factor")), .Names = c("RR",
"AA", "BB", "VV", "RJ", "JR", "BV", "VB", "AJ", "JA", "VR", "RV"
), row.names = c(NA, 20L), class = "data.frame")
如上所述,期望的矩阵将保持关系和行顺序。 GSee提供了一个我可以应用的答案,但仅提供给我的矩阵的一列,因为[[仅选择特定条目并选择多个条目用[不起作用]。我不确定我是否正朝着正确的方向前进......
根据实际数据集(如上所示),这是所需输出(前三行)的样子:
structure(list(vv = structure(c(1L, 1L, 1L), .Label = "CRR", class = "factor"),
rv = c(NA, NA, NA), ja = structure(c(1L, 1L, 1L), .Label = "CVV", class = "factor"),
aa = structure(c(1L, 1L, 1L), .Label = "CAJ", class = "factor"),
bv = structure(c(1L, 2L, 2L), .Label = c("", "CVR"), class = "factor"),
aj = structure(c(1L, 2L, 2L), .Label = c("", "CRJ"), class = "factor"),
vb = structure(c(1L, 1L, 1L), .Label = "CAA", class = "factor"),
rj = structure(c(2L, 1L, 1L), .Label = c("", "CRJ"), class = "factor"),
rr = structure(c(1L, 1L, 1L), .Label = "CBB", class = "factor"),
vr = structure(c(1L, 1L, 1L), .Label = "CJA", class = "factor"),
bb = structure(c(1L, 1L, 1L), .Label = "CBV", class = "factor"),
jr = c(NA, NA, NA)), .Names = c("vv", "rv", "ja", "aa", "bv",
"aj", "vb", "rj", "rr", "vr", "bb", "jr"), class = "data.frame", row.names = c(NA,
-3L))
我希望这更有意义。
答案 0 :(得分:5)
这有点硬编码,但想法就在那里。
require(stringr)
require(plyr)
vect <- data.frame(RR=c("Cvv", "Cvv", "Caa"))
theMat <- t(adply(levels(vect$RR), .margins=1, .fun=function(x){str_extract(string=vect$RR, pattern=x)}))[-1 ,]
colnames(theMat) <- levels(vect$RR)
colnames(theMat) <- str_sub(colnames(theMat), start=2, end=3)
theMat <- str_replace(string=theMat, pattern=paste(colnames(theMat), collapse="|"), replacement="RR")
答案 1 :(得分:3)
好的。我不知道密码4你。
#dat is the data.frame that was created from the `dput` output in the question
m <- as.matrix(dat) #convert to matrix
m[10, "AJ"] <- "" # Fix the typo/error in your data
找出输出矩阵的名称,并制作矩阵(现在用NA填充)
ocn <- unique(substr(paste(m[m!=""]), 2, 3)) #out column names
out <- matrix(NA, nrow(m), length(ocn))
colnames(out) <- ocn
循环遍历每列的每一行
for (i in seq_len(NCOL(m))) { #for each column
cn <- colnames(m)[i] #this will become the second 2 characters of new value
for (j in seq_along(m[, i])) { # for each row of this column
if (nzchar(m[j, i])) { # if there is something there (i.e. it is not "")
# do the substitution
out[j, substr(m[j, i], 2, 3)] <- paste0(substr(m[j, i], 1, 1), cn)
}
}
}
out
# vv vb rr ja rj aj vr bb jr rv aa bv
# [1,] "CRR" "CAA" "CBB" "CVV" "CJR" NA "CJA" "CBV" NA NA "CAJ" NA
# [2,] "CRR" "CAA" "CBB" "CVV" "CJR" "CRJ" "CJA" "CBV" NA NA "CAJ" "CVR"
# [3,] "CRR" "CAA" "CBB" "CVV" "GJR" "CRJ" "CJA" "CBV" "CVB" NA "CAJ" "CVR"
# [4,] "GRR" NA "CBB" "CVV" "GJR" NA NA "CBV" "CVB" NA "CAJ" "CVR"
# [5,] NA NA "CBB" "CVV" "GJR" NA NA "CBV" "GVB" NA "CAJ" "GVR"
# [6,] NA NA "CBB" "CVV" "GJR" NA "CRJ" "CBV" "GVB" NA "CAJ" "GVR"
# [7,] NA NA "CBB" "CVV" "GJR" NA "CRJ" "CBV" NA NA "CAJ" "GVR"
# [8,] "CAA" NA "CBB" "CVV" "GJR" NA "CRJ" "CBV" NA NA "CAJ" "GVR"
# [9,] "GAA" NA "CBB" "CVV" "GJR" "CRV" "CRJ" "CBV" NA "CVB" NA "GVR"
# [10,] "GAA" NA "CBB" "CVV" "GJR" "CRV" "CRJ" "CBV" NA "CVB" NA "GVR"
# [11,] "GAA" NA "CBB" "CVV" "GJR" "GRV" NA "CBV" NA "GVB" NA "GVR"
# [12,] "GAA" NA "CBB" NA "GJR" "GRV" "CRJ" "CBV" NA "GVB" NA "GVR"
# [13,] "GAA" NA "CBB" NA "GJR" "GRV" "CRJ" "CBV" NA "GVB" NA "GVR"
# [14,] "GAA" NA "CBB" "GVV" "GJR" "GRV" "CRJ" "CBV" NA NA NA "GVR"
# [15,] "GAA" NA "CBB" "GVV" "GJR" "GRV" "CRJ" "CBV" NA "GVB" NA "GVR"
# [16,] "GAA" NA "CBB" "GVV" "GJR" "GRV" "CRJ" "CBV" NA "GVB" NA "GVR"
# [17,] "GAA" "CAJ" "CBB" "GVV" "GJR" "GRV" "CRJ" "CBV" "CJA" "GVB" NA "GVR"
# [18,] "GAA" NA "CBB" "GVV" "GJR" "GRV" "CRJ" "CBV" "CJA" "GVB" NA "GVR"
# [19,] "GAA" "CAJ" "CBB" "GVV" "GJR" NA "CRJ" "CBV" "CJA" "GVB" NA "GVR"
# [20,] NA "CAJ" "CBB" "GVV" "GJR" NA "CRJ" "CBV" "CJA" NA NA "GVR"