Question

我有二项式事件的表格（例如输赢数量），我想重塑为长格式，以便每个事件都在一个单独的行上。< / p>

数据看起来像这样：

 #Fake data   
 team      <- c("a","b","c","d")
 wins.home <- c(0,1,0,2); wins.away <- c(1,1,1,1)
 loss.home <- c(0,1,2,1); loss.away <- c(2,1,2,2)
 df <- data.frame(team,wins.home,wins.away,loss.home,loss.away)
 df$games.tot <- apply(df[,-1],1,sum)


#Dataframe in WIDE format
      team  wins.home  wins.away  loss.home  loss.away  games.tot
         a         0         1         0         2         3
         b         1         1         1         1         4
         c         0         1         2         2         5
         d         2         1         1         2         6

我想将这些广泛的数据重塑为LONG格式，如下所示：

team  game where  win
a     1    away   1
a     2    away   0
a     3    away   0
b     1    home   1
b     2    away   1
b     3    home   0
b     4    away   0

编辑：游戏索引是任意的，只是一个索引。

转换此内容以便reshape2::melt可以轻松计算每行中每个类别的计数，但我不知道如何将计数进一步细分为二进制事件。

Answer 1

可能有一个较短的解决方案，但作为一个快速解决方案：

library(stringr)
library(dplyr)
library(reshape2)

#initial melt
df2 <- melt(df[-6], id.vars='team')
#split the variable column to get the 'away and home' column
df2 <- cbind(df2, str_split_fixed(df2$variable, "\\.", 2))
#create the wins column
df2$win <- ifelse(df2$`1` == 'wins',1,0)
#replicate the number of rows according to value
df2 <- df2[rep(1:nrow(df2), df2$value),]
#remove unnecessary columns
df2$variable <- df2$value <- df2$`1` <- NULL
#final group by to add the game column
df2 %>% group_by(team) %>% mutate(game = 1:n()) %>% arrange(team)

输出：

Source: local data frame [18 x 4]
Groups: team [4]

     team      2   win  game
   (fctr) (fctr) (dbl) (int)
1       a   away     1     1
2       a   away     0     2
3       a   away     0     3
4       b   home     1     1
5       b   away     1     2
6       b   home     0     3
7       b   away     0     4
#...and so on

Answer 2

这是另一种考虑因素（尽管melt，因为第一步更有意义）：

使用apply创建列名列表，并stack将它们分成两列data.frame，其中包含＆＃34; ind＆＃34; （团队）和＆＃34;价值观＆＃34; （列名）。

temp <- stack(setNames(apply(df[-c(1, length(df))], 1, function(x) 
  rep(names(df[-c(1, length(df))]), x)), df[[1]]))

然后，加载我的＆＃34; splitstackshape＆＃34;打包并执行以下操作：

library(splitstackshape)

setnames(                                            ## To create nice names
  getanID(                                           ## "game" numbers
    cSplit(temp, "values", ".")[                     ## Splits values into columns
      , values_1 := as.numeric(values_1 == "wins")], ## "wins" = 1, others = 0
    "ind"),                                          ## game numbers based on team
  c("team", "win", "where", "game"))[]               ## final names we want
#     team win where game
#  1:    a   1  away    1
#  2:    a   0  away    2
#  3:    a   0  away    3
#  4:    b   1  home    1
#  5:    b   1  away    2
#  6:    b   0  home    3
#  7:    b   0  away    4
#  8:    c   1  away    1
#  9:    c   0  home    2
# 10:    c   0  home    3
# 11:    c   0  away    4
# 12:    c   0  away    5
# 13:    d   1  home    1
# 14:    d   1  home    2
# 15:    d   1  away    3
# 16:    d   0  home    4
# 17:    d   0  away    5
# 18:    d   0  away    6

这是一种主要使用＆＃34; tidyr＆＃34;和＆＃34; dplyr＆＃34;，在＆＃34; splitstackshape＆＃34;的一点点帮助下：

library(dplyr)
library(tidyr)
library(splitstackshape)

gather(df[-6], var, val, -team) %>%            ## A new take on melt
  expandRows("val") %>%                        ## Replicates rows by values
  separate(var, into = c("win", "where")) %>%  ## Splits into two columns
  group_by(team) %>%                           ## Grouping is required
  mutate(win = as.numeric(win == "wins"),      ## Numeric version of win
         game = sequence(n())) %>%             ## Sequence of games
  arrange(team)                                ## Final sorting

而且，为了完整性，还有一个＆＃34; data.table＆＃34;替代方案可能是这样的：

library(data.table)

DT <- as.data.table(df)
sdcols <- names(DT)[-c(1, length(DT))]

DT[, list(val = rep(sdcols, .SD)), by = team, .SDcols = sdcols][
  , c("win", "where") := tstrsplit(val, ".", TRUE)][
    , `:=`(val = NULL, win = as.numeric(win == "wins"))][
      , game := sequence(.N), by = team][]

使用R将列中的分类数据重新整形为行中的二进制数据

2 个答案: