作为预测包的checkresiduals()函数和rbind()函数的结果,我得到了此矩阵(ETS_RESIDUALS):
#Result of checkresiduals() function
[,1]
[1,] "Q* = 161.83, df = 18.8, p-value < 2.2e-16"
[2,] "Q* = 125.46, df = 18.8, p-value < 2.2e-16"
[3,] "Q* = 263.65, df = 18.8, p-value < 2.2e-16"
[4,] "Q* = 81.503, df = 18.8, p-value = 8.763e-10"
[5,] "Q* = 36.616, df = 18.8, p-value = 0.008178"
str(ETS_RESIDUALS)
#chr [1:5, 1] "Q* = 161.83, df = 18.8, p-value < 2.2e-16" "Q* = 125.46, df = 18.8, p-value < 2.2e-16" "Q* = 263.65, df = 18.8, p-value < 2.2e-16" ...
class(ETS_RESIDUALS)
#[1] "matrix"
现在,我的意图是使用grep()或其他函数将这行文本拆分为一个data.frame(具有四列TEST,Q *,df,p值),如下例所示:>
TEST Q* df p-value
--------------------------------------------
TEST_1 161.83 18.8 2.2e-16
TEST_2 125.46 18.8 2.2e-16
TEST_3 263.65 18.8 2.2e-16
TEST_4 81.503 18.8 8.763e-10
TEST_5 36.616 18.8 0.008178
我尝试使用这行代码,但是效果不好。
ETS_RESIDUALS %>%
stringr::str_replace_all("(\\S+) =", "`\\1` =") %>%
paste0("data.frame(", ., ", check.names = FALSE)")
有人可以用这个代码帮助我吗?
答案 0 :(得分:3)
library(dplyr)
library(tidyr)
library(stringr)
#separate based on ,
separate(data.frame(mat), mat ,into = c('Q*','df','p-value'),sep = ',') %>%
mutate_all(~str_extract(.,'(?<=\\=|\\<\\s).*')) %>%
#Use positive look-behind to extract everything after = or < followed by a space
mutate(TEST=paste0('TEST_',1:n())) %>% select(TEST,everything())
TEST Q* df p-value
1 TEST_1 161.83 18.8 2.2e-16
2 TEST_2 125.46 18.8 2.2e-16
3 TEST_3 263.65 18.8 2.2e-16
4 TEST_4 81.503 18.8 8.763e-10
5 TEST_5 36.616 18.8 0.008178
数据
mat <- structure(c("Q* = 161.83, df = 18.8, p-value < 2.2e-16", "Q* = 125.46, df = 18.8, p-value < 2.2e-16", "Q* = 263.65, df = 18.8,
p-value < 2.2e-16", "Q* = 81.503, df = 18.8, p-value = 8.763e-10", "Q* = 36.616, df = 18.8, p-value = 0.008178"),
.Dim = c(5L, 1L))
答案 1 :(得分:1)
您可以将strsplit
用于array
,然后求解为data.frame
对象。
A <- array(apply(M, 1, function(x) unlist(strsplit(strsplit(x, ", ")[[1]], "\\s[<=]\\s"))),
c(2, 3, nrow(M)))
d <- setNames(as.data.frame(t(apply(A, 3, function(x) as.numeric(x[2, ])))), A[1,,1])
产量
d
# Q* df p-value
# 1 161.830 18.8 2.200e-16
# 2 125.460 18.8 2.200e-16
# 3 263.650 18.8 2.200e-16
# 4 81.503 18.8 8.763e-10
# 5 36.616 18.8 8.178e-03
数据
M <- structure(c("Q* = 161.83, df = 18.8, p-value < 2.2e-16", "Q* = 125.46, df = 18.8,
p-value < 2.2e-16",
"Q* = 263.65, df = 18.8, p-value < 2.2e-16", "Q* = 81.503, df = 18.8, p-value = 8.763e-10",
"Q* = 36.616, df = 18.8, p-value = 0.008178"), .Dim = c(5L, 1L
))
答案 2 :(得分:0)
一种方法是拆分和使用逻辑子集。
v1 <- unlist(strsplit(m1, ' = | < | > |, '))
setNames(as.data.frame(matrix(v1[c(FALSE, TRUE)], nrow = nrow(m1), byrow = TRUE)),
unique(v1[c(TRUE, FALSE)]))
# Q* df p-value
#1 161.83 18.8 2.2e-16
#2 161.83 18.8 2.2e-16
#3 161.83 18.8 2.2e-16
#4 161.83 18.8 2.2e-16
#5 161.83 18.8 2.2e-16
要创建额外的列,只需
df$TEST <- paste0('TEST', seq(nrow(df)))
注意:所有值都是相同的,因为样本矩阵的所有字符串都相同,
[,1]
[1,] "Q* = 161.83, df = 18.8, p-value < 2.2e-16"
[2,] "Q* = 161.83, df = 18.8, p-value < 2.2e-16"
[3,] "Q* = 161.83, df = 18.8, p-value < 2.2e-16"
[4,] "Q* = 161.83, df = 18.8, p-value < 2.2e-16"
[5,] "Q* = 161.83, df = 18.8, p-value < 2.2e-16"