使用tidyr传播从长到宽

时间:2017-05-17 11:53:44

标签: r tidyverse

假设我有以下数据代表成分A, B, C中2种成分的混合物:

(dat <- structure(list(var1 = c("A", "A", "A", "A", "A", "B", "B", "B", "C"), 
                       var2 = c("-", "B", "B", "C", "C", "-", "C", "C", "-"), 
                       val1 = c(100, 25, 50, 25, 50, 100, 25, 50, 100), 
                       val2 = c(0, 75, 50, 75, 50, 0, 75, 50, 0)), 
                  .Names = c("var1", "var2", "val1", "val2"), 
                  row.names = c(NA, -9L), class = "data.frame"))

#   var1 var2 val1 val2
# 1    A    -  100    0
# 2    A    B   25   75
# 3    A    B   50   50
# 4    A    C   25   75
# 5    A    C   50   50
# 6    B    -  100    0
# 7    B    C   25   75
# 8    B    C   50   50
# 9    C    -  100    0

我现在要转换这些数据:我希望标有A, B, C的列给出每种成分的内容:

#     A   B   C
# 1 100   0   0
# 2  25  75   0
# 3  50  50   0
# 4  25   0  75
# 5  75   0  25
# 6   0 100   0
# 7   0  25  75
# 8   0  50  25
# 9   0   0 100

如何使用tidyr解决这个问题? spread&amp;的任意组合unite

4 个答案:

答案 0 :(得分:2)

我确信有更优雅的方法可以做到这一点,但你可以做到以下几点:

library(dplyr)
library(tidyr)
wideDf <- data.frame(id = rep(1:nrow(dat),2),
                var = c(dat$var1, dat$var2), val = c(dat$val1, dat$val2)) %>%
        filter(var != "-") %>% tidyr::spread(key = var, value = val, fill = 0) 

给你:

> longDf
#   id   A   B   C
# 1  1 100   0   0
# 2  2  25  75   0
# 3  3  50  50   0
# 4  4  25   0  75
# 5  5  50   0  50
# 6  6   0 100   0
# 7  7   0  25  75
# 8  8   0  50  50
# 9  9   0   0 100

答案 1 :(得分:1)

我会分两部分将它们加在一起

library("tidyverse")
v1 <- dat %>% rownames_to_column() %>% spread(key = var1, value = val1, fill = 0) %>% select(A, B, C)
v2 <- dat %>% rownames_to_column() %>% spread(key = var2, value = val2, fill = 0) %>% select(A = `-`, B, C)

v1 + v2

答案 2 :(得分:1)

如果您想使用reshape2

dat$id <- row.names(dat)
dcast(rbind(dat[,c(5,1,3)],
        setnames(dat[,c(5,2,4)],c("id","var1","val1"))), 
        id~var1, value.var = "val1", fill=0)[, -2]  

#   id   A   B   C
# 1  1 100   0   0
# 2  2  25  75   0
# 3  3  50  50   0
# 4  4  25   0  75
# 5  5  50   0  50
# 6  6   0 100   0
# 7  7   0  25  75
# 8  8   0  50  50
# 9  9   0   0 100

答案 3 :(得分:0)

这是一种不完全健壮的方法,但似乎适用于您的示例。也许你可以用它作为更好解决方案的灵感。

t(apply(dat, MARGIN = 1, FUN = function(x) {
  # "split" the data into names and values. works only for two columns, obviously
  xval <- as.numeric(x[3:4])
  names(xval) <- x[1:2]
  # make sure it's sorted for reasons that will become apparent later
  xval <- xval[order(names(xval))]
  # prepare an empty vector
  out <- c(A = 0, B = 0, C = 0)
  # and insert values from columns which appear in xval
  find.index <- names(out) %in% names(xval)
  out[find.index] <- xval[names(xval) %in% names(out)]

  out
}))

        A   B   C
 [1,] 100   0   0
 [2,]  25  75   0
 [3,]  50  50   0
 [4,]  25   0  75
 [5,]  50   0  50
 [6,]   0 100   0
 [7,]   0  25  75
 [8,]   0  50  50
 [9,]   0   0 100