我有三个具有不同行数的数据帧,我想创建一个新的数据帧,其中包含来自这些数据帧的随机100个值,并基于两个标准:
a - 列a和b的100个随机值
b - 50个配对值中前50行的列c和d,即 发生在数据帧2(c1和d1)
的同一行c - 后续50行列(51-100)c和d,50个配对值,发生在数据帧3(c2和d2)的同一行中
我试着循环,但它还不错。我怎么能做得更好?感谢
以下是数据和脚本以及预期结果:
a <- c(4,6,7,3,2,5,6,9,6,5,8,6,7,8,9,7,6)
b <- c(40,60,70,30,20,NA,60,90,60,50,75,34,42,32,NA,45,29)
c1 <- c(1,2,3,4,5,6,7,8,9,10)
d1 <- c(10,9,8,7,6,5,4,3,2,1)
c2 <- c(11,12,13,14,15,16,17,18,19,20)
d2 <- c(20,19,18,17,16,15,14,13,12,11)
df1 <- data.frame(a,b)
df2 <- data.frame(c1,d1)
df3 <- data.frame(c2,d2)
#newdf (with 100 rows)
n <- 100
newdf <- data.frame(n=rep(1:n))
newdf$a <- NA
newdf$b <- NA
newdf$c <- NA
newdf$d<- NA
for (i in 1:50){
newdf$a[i] <- sample(df1$a, 1, replace=T) # random value
newdf$b[i] <- sample(df1$b, 1, replace=T) # random value
newdf$c[i] <- sample[df2$c1,1, replace=T] # one criterion
newdf$d[i] <- sample[df2$d1,1, replace=T] # one criterion
}
for (i in 51:100){
newdf$a[i] <- sample(df1$a, 1, replace=T) # random value
newdf$b[i] <- sample(df1$b, 1, replace=T) # random value
newdf$c[i] <- sample[df3$c2,1, replace=T] # two criterion
newdf$d[i] <- sample[df3$d2,1, replace=T] #two criterion
}
#Result
a b c d
7 60 1 10 # one row
6 50 3 8
2 90 5 6 # fifth row
.
.
.
2 90 11 20 # fifth-one row
.
.
.
答案 0 :(得分:0)
我无法完全按照您的预期结果。例子没有对(a,b)=(7,60)。希望下面接近您的需求
set.seed = 123
library(dplyr)
# use a shorter set, because it needs to match by row number
df1_short <- df1[1:10,]
# take the sample (using 6 here because it is less than the 10 we ahve)
df1_sampled <- df1_short %>% sample_n(6)
# build the two halves separately, merge by row numbers (by=0)
df1a <- df1_sampled[1:3,] %>% merge(df2, by=0) %>% rename(c = c1, d = d1)
df1b <- df1_sampled[4:6,] %>% merge(df3, by=0) %>% rename(c = c2, d = d2)
# combine the two sets
result <- bind_rows(df1a, df1b)
result
# Row.names a b c d
# 1 1 4 40 1 10
# 2 7 6 60 7 4
# 3 9 6 60 9 2
# 4 10 5 50 20 11
# 5 2 6 60 12 19
# 6 3 7 70 13 18