我想创建一个新列,其中每个值都是我数据中该行的其他值的随机子集。
# Example data:
df <- data.frame(matrix(nrow = 57, ncol = 6)) %>%
mutate(
X1 = round(rnorm(n = 57, mean = 0, sd = 1), 1),
X2 = round(rnorm(n = 57, mean = 0, sd = 1), 1),
X3 = round(rnorm(n = 57, mean = 0, sd = 1), 1),
X4 = round(rnorm(n = 57, mean = 0, sd = 1), 1),
X5 = round(rnorm(n = 57, mean = 0, sd = 1), 1),
X6 = round(rnorm(n = 57, mean = 0, sd = 1), 1)
)
# my failed attempt at a new column
df %>%
rowwise() %>%
mutate(X7 = str_c(df[, sample(1:6, 3, replace = F)]), sep = ", ")
答案 0 :(得分:2)
解决方案使用tidyverse
。关键是逐行拆分数据并应用函数来对每个行子集的值进行采样。 map_df
可以实现上述任务并将所有输出组合到数据帧。 df2
是最终输出。
# Load package
library(tidyverse)
# Set seed
set.seed(123)
# Create example data frame
df <- data.frame(matrix(nrow = 57, ncol = 6)) %>%
mutate(
X1 = round(rnorm(n = 57, mean = 0, sd = 1), 1),
X2 = round(rnorm(n = 57, mean = 0, sd = 1), 1),
X3 = round(rnorm(n = 57, mean = 0, sd = 1), 1),
X4 = round(rnorm(n = 57, mean = 0, sd = 1), 1),
X5 = round(rnorm(n = 57, mean = 0, sd = 1), 1),
X6 = round(rnorm(n = 57, mean = 0, sd = 1), 1)
)
# Process the data
df2 <- df %>%
rowid_to_column() %>%
split(f = .$rowid) %>%
map_df(function(dt){
dt_sub <- dt %>%
select(-rowid) %>%
select(sample(1:6, 3, replace = FALSE)) %>%
unite(X7, everything(), sep = ", ")
return(dt_sub)
}) %>%
bind_cols(df) %>%
select(paste0("X", 1:7))
df2
X1 X2 X3 X4 X5 X6 X7
1 -0.6 0.6 0.5 0.1 0.9 0.1 0.1, 0.5, 0.9
2 -0.2 0.1 0.3 0.0 -1.0 0.2 0.1, 0.3, 0.2
3 1.6 0.2 0.1 2.1 2.0 1.6 1.6, 2.1, 0.1
4 0.1 0.4 -0.6 -0.7 -0.1 -0.2 0.1, 0.4, -0.6
5 0.1 -0.5 -0.8 -1.1 0.2 0.2 0.1, 0.2, -0.5
6 1.7 -0.3 -1.0 0.0 -0.7 1.2 -1, -0.7, -0.3
7 0.5 -1.0 0.1 0.3 -0.6 1.1 0.5, -0.6, -1
...
答案 1 :(得分:1)
我认为最好的方法是使用基础R
函数replicate
,sample
和sapply
。
inx <- t(replicate(nrow(df), sample(1:6, 3, replace = F)))
df$X7 <- sapply(seq_len(nrow(df)), function(i)
paste(df[i, inx[i, ]], collapse = ", "))
答案 2 :(得分:1)
这是dplyr
中的解决方案:
library(dplyr)
df %>%
group_by(idx = seq(n())) %>%
do({
res <- select(., -idx)
bind_cols(res, X7 = toString(sample(unlist(res),
3, replace = FALSE)))
}) %>%
ungroup() %>%
select(-idx)
结果:
# A tibble: 57 x 7
X1 X2 X3 X4 X5 X6 X7
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 0.4 0.4 -0.1 3.4 0.9 -0.4 0.4, 0.9, 0.4
2 1.5 0.9 -0.7 1.5 -1.1 -0.3 -0.7, 1.5, -1.1
3 -0.1 -0.5 -0.6 -0.8 -0.3 2.3 -0.3, 2.3, -0.8
4 0.7 -1.0 0.3 0.2 -0.5 -0.3 -1, 0.3, -0.3
5 0.6 0.9 0.4 1.9 -0.7 -2.0 0.4, -2, 0.9
6 0.3 0.7 1.3 0.6 1.3 -0.2 0.7, -0.2, 1.3
7 0.5 0.3 1.1 -0.2 -0.4 -0.8 0.5, 1.1, 0.3
8 0.4 -1.9 0.8 -0.6 -1.1 0.4 0.4, -1.9, -0.6
9 0.2 -1.5 -1.9 1.0 0.0 0.6 0, 1, 0.6
10 -0.2 0.7 -0.5 1.4 0.3 -0.1 -0.2, 0.3, -0.5