我正在尝试从R数据框中进行采样,但我对分类变量有一些问题。 我没有采用行的随机子样本,但我生成的行使得新变量具有与原始变量相同的分布。 我对分类变量有疑问。
> head(x0)
Symscore1 Symscore2 exercise3 exerciseduration3 groupchange age3
3 1 0 1 0 Transitional to Menopausal 52
4 0 0 5 2 Transitional to Menopausal 62
6 0 0 2 0 Transitional to Menopausal 54
8 0 0 5 3 Transitional to Menopausal 56
10 0 0 4 3 Transitional to Menopausal 59
13 0 1 4 3 Transitional to Menopausal 55
packyears bmi3 education3
3 2.357143 23.24380 Basic
4 2.000000 16.76574 University
6 1.000000 23.30668 Basic
8 1.428571 22.14533 University
10 1.428571 22.14533 University
13 0.000000 22.03857 University
> xa = as.data.frame(sapply(X = x0, FUN = sample))
> head(xa)
Symscore1 Symscore2 exercise3 exerciseduration3 groupchange age3 packyears
1 1 0 2 3 4 49 53.571430
2 0 0 3 0 3 46 2.142857
3 1 0 3 3 4 49 4.000000
4 0 1 3 3 4 58 0.000000
5 0 0 2 0 1 57 0.000000
6 0 0 3 0 1 47 26.871429
bmi3 education3
1 25.84777 2
2 21.25850 2
3 25.79592 3
4 23.93899 1
5 25.97012 2
6 23.53037 2
> X = rbind(x0,xa)
Warning messages:
1: In `[<-.factor`(`*tmp*`, ri, value = c(4, 3, 4, 4, 1, 1, 2, 4, 4, :
invalid factor level, NA generated
2: In `[<-.factor`(`*tmp*`, ri, value = c(2, 2, 3, 1, 2, 2, 3, 2, 2, :
invalid factor level, NA generated
>
答案 0 :(得分:1)
你可以尝试:
x2 <- x0
x2[] <- lapply(x0, FUN = sample)
x2
# Symscore1 Symscore2 exercise3 exerciseduration3 groupchange
#3 0 0 1 0 Transitional to Menopausal
#4 0 0 5 3 Transitional to Menopausal
#6 0 0 4 3 Transitional to Menopausal
#8 0 0 2 0 Transitional to Menopausal
#10 1 1 4 3 Transitional to Menopausal
#13 0 0 5 2 Transitional to Menopausal
age3
#3 54
#4 59
#6 52
#8 56
#10 62
#13 5
rbind(x0,x2)
x0 <- structure(list(Symscore1 = c(1L, 0L, 0L, 0L, 0L, 0L), Symscore2 = c(0L,
0L, 0L, 0L, 0L, 1L), exercise3 = c(1L, 5L, 2L, 5L, 4L, 4L), exerciseduration3 = c(0L,
2L, 0L, 3L, 3L, 3L), groupchange = structure(c(1L, 1L, 1L, 1L,
1L, 1L), .Label = "Transitional to Menopausal", class = "factor"),
age3 = c(52L, 62L, 54L, 56L, 59L, 5L)), .Names = c("Symscore1",
"Symscore2", "exercise3", "exerciseduration3", "groupchange",
"age3"), class = "data.frame", row.names = c("3", "4", "6", "8",
"10", "13"))