我正在尝试(有效地)重新排列R中的数据帧。
我的数据是从两个参与者群体(1或0,即疾病和对照组)的四个不同实验中收集的实验数据。
示例数据框:
Subject type Experiment 1 Experiment 2 Experiment 3 Experiment 4
0 4.6 2.5 1.4 5.3
0 4.7 2.4 1.8 5.1
1 3.5 1.2 5.6 7.5
1 3.8 1.7 6.2 8.1
我想重新安排我的数据帧,使其结构如下(原因是,当我们在R中构造这样的结构时,它使我更容易在数据上运行函数):
Subject type Experiment Measure
0 1 4.6
0 2 2.5
0 3 1.4
0 4 5.3
0 1 4.7
0 2 2.4
0 3 1.8
0 4 5.1
1 1 3.5
1 2 1.2
1 3 5.6
1 4 7.5
1 1 3.8
1 2 1.7
1 3 6.2
1 4 8.1
正如你所看到的,现在发生的是每个主题现在占据四排;现在,每行都属于单个测量而不是单个测量。这是(至少现在)我更方便插入R功能。也许及时我会想办法完全跳过这一步,但我是R的新手,这似乎是最好的做事方式。
无论如何 - 问题是,进行此数据帧转换的最有效方法是什么?目前我这样做:
# Input dframe1
dframe1 <- structure(list(subject_type = c(0L, 0L, 1L, 1L), experiment_1 = c(4.6,
4.7, 3.5, 3.8), experiment_2 = c(2.5, 2.4, 1.2, 1.7), experiment_3 = c(1.4,
1.8, 5.6, 6.2), experiment_4 = c(5.3, 5.1, 7.5, 8.1)), .Names = c("subject_type",
"experiment_1", "experiment_2", "experiment_3", "experiment_4"
), class = "data.frame", row.names = c(NA, -4L))
# Create a matrix
temporary_matrix <- matrix(ncol=3, nrow=nrow(dframe1) * 4)
colnames(temporary_matrix) <- c("subject_type","experiment","measure")
# Rearrange dframe1 so that a different measure is in each column
for(i in 1:nrow(dframe1)) {
temporary_matrix[i*4-3,"subject_type"] <- dframe1$subject_type[i]
temporary_matrix[i*4-3,"experiment"] <- 1
temporary_matrix[i*4-3,"measure"] <- dframe1$experiment_1[i]
temporary_matrix[i*4-2,"subject_type"] <- dframe1$subject_type[i]
temporary_matrix[i*4-2,"experiment"] <- 2
temporary_matrix[i*4-2,"measure"] <- dframe1$experiment_2[i]
temporary_matrix[i*4-1,"subject_type"] <- dframe1$subject_type[i]
temporary_matrix[i*4-1,"experiment"] <- 3
temporary_matrix[i*4-1,"measure"] <- dframe1$experiment_3[i]
temporary_matrix[i*4-0,"subject_type"] <- dframe1$subject_type[i]
temporary_matrix[i*4-0,"experiment"] <- 4
temporary_matrix[i*4-0,"measure"] <- dframe1$experiment_4[i]
}
# Convert matrix to a data frame
dframe2 <- data.frame(temporary_matrix)
# NOTE: For some reason, this has to be converted back into a double (at some point above it becomes a factor)
dframe2$measure <- as.double(as.character(dframe2$measure))
当然有更好的方法吗?!
答案 0 :(得分:5)
使用reshape2
包,这非常简单。
library(reshape2)
# assuming your data.frame is called `dat`
melt(dat, id.vars=c("Subject type"))
如果您愿意,可以使它更具有潜力:
newdat <- melt(dat, id.vars=c("Subject type"), variable.name="Experiment", value.name="Measure")
# remove "experiment " from the names, and convert to numeric
newdat$Experiment <- as.numeric(gsub("Experiment\\s*", "", as.character(newdat$Experiment)))
答案 1 :(得分:4)
基础reshape
方法:
获取数据:
dframe1 <- structure(list(subject_type = c(0L, 0L, 1L, 1L), experiment_1 = c(4.6,
4.7, 3.5, 3.8), experiment_2 = c(2.5, 2.4, 1.2, 1.7), experiment_3 = c(1.4,
1.8, 5.6, 6.2), experiment_4 = c(5.3, 5.1, 7.5, 8.1)), .Names = c("subject_type",
"experiment_1", "experiment_2", "experiment_3", "experiment_4"
), class = "data.frame", row.names = c(NA, -4L))
将变量设置为堆栈:
expandvars <- paste('experiment',1:4,sep='_')
重新塑造!
dfrm1res <- reshape(
dframe1,
idvar="subject_type",
varying=list(expandvars),
v.names=c("value"),
direction="long",
new.row.names=1:16
)
结果:
> dfrm1res
subject_type time value
1 0 1 4.6
2 0 1 4.7
3 1 1 3.5
4 1 1 3.8
5 0 2 2.5
6 0 2 2.4
7 1 2 1.2
8 1 2 1.7
9 0 3 1.4
10 0 3 1.8
11 1 3 5.6
12 1 3 6.2
13 0 4 5.3
14 0 4 5.1
15 1 4 7.5
16 1 4 8.1
答案 2 :(得分:4)
data.frame(subject_type=dframe1$subject_type, stack(dframe1[2:5] ) )
subject_type values ind
1 0 4.6 experiment_1
2 0 4.7 experiment_1
3 1 3.5 experiment_1
4 1 3.8 experiment_1
5 0 2.5 experiment_2
6 0 2.4 experiment_2
7 1 1.2 experiment_2
8 1 1.7 experiment_2
9 0 1.4 experiment_3
10 0 1.8 experiment_3
11 1 5.6 experiment_3
12 1 6.2 experiment_3
13 0 5.3 experiment_4
14 0 5.1 experiment_4
15 1 7.5 experiment_4
16 1 8.1 experiment_4
或使用基础reshape
(虽然看起来我的偏好与thelatemal的使用不同。):
dframe1$subject=1:4
reshape(dframe1, direction="long", idvar=c("subject_type", "subject"),
varying=2:5, sep="_", v.names="exp_value")
#--------------------------
subject_type subject time exp_value
0.1.1 0 1 1 4.6
0.2.1 0 2 1 4.7
1.3.1 1 3 1 3.5
1.4.1 1 4 1 3.8
0.1.2 0 1 2 2.5
0.2.2 0 2 2 2.4
1.3.2 1 3 2 1.2
1.4.2 1 4 2 1.7
0.1.3 0 1 3 1.4
0.2.3 0 2 3 1.8
1.3.3 1 3 3 5.6
1.4.3 1 4 3 6.2
0.1.4 0 1 4 5.3
0.2.4 0 2 4 5.1
1.3.4 1 3 4 7.5
1.4.4 1 4 4 8.1