将4列熔化为3,同时组合两个可变色谱柱

时间:2016-10-06 10:12:43

标签: r dataframe reshape

以下示例data.frame:

Date <- seq(as.Date("2016/9/1"), as.Date("2016/9/10"), "days")
A <- sample(0:200, 10)
B <- sample(0:400, 10)
A_limit <- rep(200,10)
B_limit <- rep(400,10)
data_sample <- data.frame(Date,A,B,A_limit,B_limit)

>        Date   A   B A_limit B_limit
1  2016-09-01 175 270     200     400
2  2016-09-02 160  50     200     400
3  2016-09-03 173  25     200     400
...

我希望将其重塑为以下形式:

>        Date limit variable value
1  2016-09-01   200        A   175
2  2016-09-02   200        A   160
3  2016-09-03   200        A   173
...
31 2016-09-01   400        B   270
32 2016-09-02   400        B    50
33 2016-09-03   400        B    25
....

我设法完成它,但在我看来,我的方式很复杂:

library("reshape2")
data_sample_2 <- data_sample %>% melt(id=c("Date","A","B"))

levels(data_sample_2$variable) <- c(levels(data_sample_2$variable),"A","B")
data_sample_2$variable[data_sample_2$variable == "A_limit"] <- as.factor("A")
data_sample_2$variable[data_sample_2$variable == "B_limit"] <- as.factor("B")

names(data_sample_2)[names(data_sample_2) == "value"] <- "limit"
names(data_sample_2)[names(data_sample_2) == "variable"] <- "variable_1"

data_sample_3 <- data_sample_2 %>% melt(id=c("Date","variable_1","limit"))
data_sample_3 <- droplevels(data_sample_3)
data_sample_4 <- data_sample_3[data_sample_3$variable_1 == data_sample_3$variable,]

data_sample_4$variable_1 <- NULL

我刚刚开始使用reshape2包,所以请让我知道我可以改进这个data.frame转换的任何方式(无论它看起来多么明显)。

3 个答案:

答案 0 :(得分:1)

这是你想要的吗?

data_sample_2 <- melt(data_sample,id.vars=c("Date","A_limit","B_limit"))
data_sample_2$limit<- ifelse(data_sample_2$variable=="A",data_sample_2$A_limit,data_sample_2$B_limit)
data_sample_2[,c("Date","limit","variable","value")]

答案 1 :(得分:1)

你可以通过基地R简单地通过stack一切来做,即

df1 <- data.frame(Date = data_sample$Date, limit = stack(data_sample[-(1:3)])[[1]], 
                  variable = stack(data_sample[2:3])[[2]], 
                  value = stack(data_sample[2:3])[[1]], 
                                     stringsAsFactors = FALSE)

head(df1)
#        Date  limit variable value
#1 2016-09-01    200   A       67
#2 2016-09-02    200   A      100
#3 2016-09-03    200   A      166
#4 2016-09-04    200   A      116
#5 2016-09-05    200   A       89
#6 2016-09-06    200   A      138

tail(df1)
#         Date  limit variable value
#15 2016-09-05    400   B      208
#16 2016-09-06    400   B      387
#17 2016-09-07    400   B      125
#18 2016-09-08    400   B      116
#19 2016-09-09    400   B      120
#20 2016-09-10    400   B      241

答案 2 :(得分:1)

由于您在示例中使用了reshape2,因此您可能会对如何在(更新的)tidyverse设置中处理它感兴趣。

我会重复你的代码:

Date <- seq(as.Date("2016/9/1"), as.Date("2016/9/10"), "days")
A <- sample(0:200, 10)
B <- sample(0:400, 10)
A_limit <- rep(200,10)
B_limit <- rep(400,10)
data_sample <- data.frame(Date,A,B,A_limit,B_limit)

# Preview
head(data_sample)
#>         Date   A   B A_limit B_limit
#> 1 2016-09-01  39  53     200     400
#> 2 2016-09-02  96 193     200     400
#> 3 2016-09-03 143  75     200     400
#> 4 2016-09-04  60 241     200     400
#> 5 2016-09-05 126 225     200     400
#> 6 2016-09-06 184 349     200     400

现在,我们可以使用dplyrtidyr(承担reshape2所承担的大部分责任)以“清晰”的方式操纵数据。

library(dplyr)
library(tidyr)

data_clean <- data_sample %>%
  gather(variable, value, A, B) %>%
  mutate(limit = if_else(variable == "A", A_limit, B_limit)) %>%
  select(Date, limit, variable, value)

# Inspect results
head(data_clean)
#>         Date limit variable value
#> 1 2016-09-01   200        A    39
#> 2 2016-09-02   200        A    96
#> 3 2016-09-03   200        A   143
#> 4 2016-09-04   200        A    60
#> 5 2016-09-05   200        A   126
#> 6 2016-09-06   200        A   184