Question

我有以下数据结构：

     Player Team Round Question Answer 
 1:      2    1     1        1      1 
 2:      5    1     1        1      1 
 3:      8    1     1        1      1 
 4:      9    1     1        1      1
 5:     10    1     1        1      1
 6:      2    1     1        2      4
 7:      5    1     1        2      5
 8:      8    1     1        2      5 
 9:      9    1     1        2      5
10:     10    1     1        2      5 
11:      2    1     1        4      4 
12:      5    1     1        4      3 
13:      8    1     1        4      4 
14:      9    1     1        4      2 
15:     10    1     1        4      4 
16: ...

所以有几个球队来自几个球队，回答了几个问题。总共有2轮比赛。

我尝试通过对团队和问题进行分组，从数据中计算出媒介和协议系数（请参阅agrmt package）。

结果应如下所示：

Team Question Median_R1 Agrmt_R1 Median_R2 Agrmt_R2 1: 1 1 1 1 1 1 2: 1 2 2 0.83 1 1 3: ... 4: 5 10 4 1 4 1

有人知道这是否可行？我无法找到解决方案。我可以单独解决中位数和协议系数，但不能合并？

每个提示都是受欢迎的。非常感谢你。

更新
协议函数返回介于-1和1之间的系数。值表示。

1表示完整协议（例如，如果每个玩家回答5）。

0，如果每个玩家都有不同的答案。

-1将是，如果存在分歧（一些玩家说答案1而其他人说5）

与中值相比，协议函数采用频率向量的向量。

例如，我们有以下答案

Player Team Round Question Answer 6: 2 1 1 2 4 7: 5 1 1 2 5 8: 8 1 1 2 5 9: 9 1 1 2 5 10: 10 1 1 2 5

功能输入如下所示：
中位数输入：4,5,5,5,5 - ＆gt;结果：5
协议输入：0,0,0,1,4 - ＆gt;结果：0.9

更新2：已解决

协议的计算可以使用以下代码完成：

agreement(table(factor(x, levels=1:5)))

决赛基于@sandipan实施。我必须添加另一个排序步骤才能合并正确的data.frames：

library(agrmt) df1 <- unique(df[c('Party', 'Question')]) for (df.R in split(df, df$Round)) { round <- unique(df.R$Round) # get the data.frame of the current Round. df2 <- as.data.frame(as.list(aggregate(Answer ~ Party + Question + Round, df.R, FUN = function(x) c(Median = median(x), Agrmt = agreement(table(factor(x, levels=1:5))))))) # sort it and take only the columns of median and agreement df3 <- df2[with(df2, order(Party, Question)),][4:5] names(df3) <- c(paste('Median_R', round, sep=''), paste('Agrmt_R', round, sep='')) df1 <- cbind.data.frame(df1, df3) } df1

谢谢大家的帮助。

Answer 1

以下是三种方法：基础R aggregate，dplyr和data.table。

使用基础R aggregate：

library(agrmt)

aggregate(Answer ~ Team + Round + Question, data=dat,
          FUN = function(x) {
            c(Median=median(x), 
              Agreement=agreement(table(factor(x, levels=1:5))))
          })

  Team Round Question Answer.Median Answer.Agreement
1    1     1        1           1.0              1.0
2    1     1        2           5.0              0.9
3    1     1        4           4.0              0.7

使用dplyr：

library(dplyr)

dat.summary = dat %>% group_by(Team, Round, Question) %>%
  summarise(Median=median(Answer),
            Agreement=agreement(table(factor(Answer, levels=1:5))))

   Team Round Question Median Agreement
1     1     1        1      1       1.0
2     1     1        2      5       0.9
3     1     1        4      4       0.7

使用data.table：

library(data.table)

dat.summary = setDT(dat)[, list(Median=median(Answer), 
                                Agreement=agreement(table(factor(Answer, levels=1:5)))), 
                         by=list(Team, Round, Question)]

   Team Round Question Median Agreement
1:    1     1        1      1       1.0
2:    1     1        2      5       0.9
3:    1     1        4      4       0.7

要获得“宽”数据框作为最终输出：

在上面的示例中，我将输出保留为“long”格式。如果要重新整形为“宽”格式，以便每个Round获得自己的一组列，您可以执行以下操作：

首先，让我们通过堆叠样本数据的另一个副本，为样本数据添加第二轮：

library(dplyr)
library(reshape2)
library(agrmt)

dat = bind_rows(dat, dat %>% mutate(Round=2))

现在使用我们之前在dplyr示例中使用的相同代码计算中位数和协议：

dat.summary = dat %>% 
  group_by(Team, Round, Question) %>%
  summarise(Median=median(Answer),
            Agreement=agreement(table(factor(Answer, levels=1:5))))

最后，重塑为宽幅格式。这需要首先“融合”数据以将Median和Agreement列堆叠到单个列中，然后转换为宽格式。我们还包括第二行代码，为每个Round添加“Round”，以便我们在宽数据框中获得所需的列名：

dat.summary = dat.summary %>%
  mutate(Round = paste0("Round", Round)) %>% 
  melt(id.var=c("Team","Question","Round")) %>%
  dcast(Team + Question ~ variable + Round, value.var="value")

  Team Question Median_Round1 Median_Round2 Agreement_Round1 Agreement_Round2
1    1        1             1             1              1.0              1.0
2    1        2             5             5              0.9              0.9
3    1        4             4             4              0.7              0.7

Answer 2

我想你想要的东西如下，对吧？

df
    Player Team Round Question Answer
1:       2    1     1        1      1
2:       5    1     1        1      1
3:       8    1     1        1      1
4:       9    1     1        1      1
5:      10    1     1        1      1
6:       2    1     1        2      4
7:       5    1     1        2      5
8:       8    1     1        2      5
9:       9    1     1        2      5
10:     10    1     1        2      5
11:      2    1     1        4      4
12:      5    1     1        4      3
13:      8    1     1        4      4
14:      9    1     1        4      2
15:     10    1     1        4      4
16:      2    1     2        1      2
17:      5    1     2        1      3
18:      8    1     2        1      4
19:      2    1     2        2      5
20:      5    1     2        2      3
21:      8    1     2        2      1
22:      2    1     2        4      6
23:      5    1     2        4      1
24:      8    1     2        4      5

library(agrmt)
df1 <- unique(df[c('Team', 'Question')])
for (df.R in split(df, df$Round)) {
  round <- unique(df.R$Round)
  df2 <- as.data.frame(as.list(aggregate(Answer ~ Team + Question + Round, 
           df.R, FUN = function(x) c(Median = median(x), Agrmt =  agreement(x)))))[4:5]
  names(df2) <- c(paste('Median_R', round, sep=''), paste('Agrmt_R', round, sep=''))
  df1 <- cbind.data.frame(df1, df2)
}

df1
    Team Question Median_R1    Agrmt_R1 Median_R2   Agrmt_R2
1:     1        1         1  0.00000000         3  0.2222222
6:     1        2         5  0.04166667         3  0.4444444
11:    1        4         4 -0.05882353         5 -0.5833333

R：如何为多个组创建中值和协议函数

2 个答案: