Question

我有三个具有不同行数的数据帧，我想创建一个新的数据帧，其中包含来自这些数据帧的随机100个值，并基于两个标准：

a - 列a和b的100个随机值

b - 50个配对值中前50行的列c和d，即发生在数据帧2（c1和d1）

的同一行

c - 后续50行列（51-100）c和d，50个配对值，发生在数据帧3（c2和d2）的同一行中

我试着循环，但它还不错。我怎么能做得更好？感谢

以下是数据和脚本以及预期结果：

a <- c(4,6,7,3,2,5,6,9,6,5,8,6,7,8,9,7,6)
b <- c(40,60,70,30,20,NA,60,90,60,50,75,34,42,32,NA,45,29)

c1 <- c(1,2,3,4,5,6,7,8,9,10)
d1 <- c(10,9,8,7,6,5,4,3,2,1)

c2 <- c(11,12,13,14,15,16,17,18,19,20)
d2 <- c(20,19,18,17,16,15,14,13,12,11)

df1 <- data.frame(a,b)
df2 <- data.frame(c1,d1)
df3 <- data.frame(c2,d2)

#newdf (with 100 rows)

n <- 100
newdf <- data.frame(n=rep(1:n))
newdf$a <- NA 
newdf$b <- NA 
newdf$c <- NA
newdf$d<- NA

for (i in 1:50){
  newdf$a[i] <- sample(df1$a, 1, replace=T) # random value
  newdf$b[i] <- sample(df1$b, 1, replace=T) # random value 
  newdf$c[i] <- sample[df2$c1,1, replace=T] # one criterion
  newdf$d[i] <- sample[df2$d1,1, replace=T] # one criterion
}

for (i in 51:100){
  newdf$a[i] <- sample(df1$a, 1, replace=T) # random value
  newdf$b[i] <- sample(df1$b, 1, replace=T) # random value 
  newdf$c[i] <- sample[df3$c2,1, replace=T] # two criterion
  newdf$d[i] <- sample[df3$d2,1, replace=T] #two criterion
}

#Result 

a     b      c    d
7     60     1    10 # one row
6     50     3    8
2     90     5    6  # fifth row
.
.
.
2     90     11    20  # fifth-one row
.
.
.

Answer 1

我无法完全按照您的预期结果。例子没有对（a，b）=（7,60）。希望下面接近您的需求

set.seed = 123
library(dplyr)
# use a shorter set, because it needs to match by row number
df1_short <- df1[1:10,]

# take the sample (using 6 here because it is less than the 10 we ahve)
df1_sampled <- df1_short %>% sample_n(6) 

# build the two halves separately, merge by row numbers (by=0)
df1a <- df1_sampled[1:3,] %>% merge(df2, by=0) %>% rename(c = c1, d = d1)
df1b <- df1_sampled[4:6,] %>% merge(df3, by=0) %>% rename(c = c2, d = d2)

# combine the two sets
result <- bind_rows(df1a, df1b) 
result

#   Row.names a  b  c  d
# 1         1 4 40  1 10
# 2         7 6 60  7  4
# 3         9 6 60  9  2
# 4        10 5 50 20 11
# 5         2 6 60 12 19
# 6         3 7 70 13 18

随机样本用于创建新的数据帧，使用循环或其他替代方法

1 个答案: