编辑

Question

如何在组中选择唯一的观察。可复制的示例。

mydata=structure(list(N = c(111L, 111L, 111L, 111L, 112L, 112L, 112L, 
111L, 111L, 111L, 111L, 112L, 112L, 112L), group = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "control group", class = "factor"), 
    char = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L), .Label = c("bad", "good"), class = "factor")), .Names = c("N", 
"group", "char"), class = "data.frame", row.names = c(NA, -14L
))

我只需要通过char变量在N中找到唯一的观察值。因此N包含囚犯人数。 char包含良好或不良行为因此，我必须按好和坏类别计算唯一囚犯总数。有两组控制和测试，我只是表示控制。我们可以在此处看到111和112的唯一观测值。

这里是我想要的输出

    number of unique   prisoners for control group
bad     2
good    2

如何执行？

编辑

mydata=structure(list(N = c(111L, 111L, 111L, 111L, 112L, 112L, 112L, 
111L, 111L, 111L, 111L, 112L, 112L, 112L, 111L, 111L, 111L, 111L, 
112L, 112L, 112L, 111L, 111L, 111L, 111L, 112L, 112L, 112L), 
    group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L), .Label = c("control group", "test group"), class = "factor"), 
    char = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L), .Label = c("bad", "good"), class = "factor")), .Names = c("N", 
"group", "char"), class = "data.frame", row.names = c(NA, -28L
))

输出按组划分

     control group test group
bad    2             2
good    2             2

Answer 1

使用data.table和dplyr软件包：

library(data.table)
library(dplyr)

mydata %>% 
group_by(char) %>% 
summarise(Unique = uniqueN(N))

或您的最后一个问题：

library(data.table)
library(dplyr)

mydata %>% 
  group_by(char) %>% 
  summarise(Control = paste(uniqueN(mydata[mydata$group == 'control group',]$N),"(",
                           formatC(100 * uniqueN(mydata[mydata$group == 'control group',]$N)/100, format = "f", digits = 2),"%",")", sep = ""), 
            Test = paste(uniqueN(mydata[mydata$group == 'control group',]$N),"(",
                         formatC(100 * uniqueN(mydata[mydata$group == 'control group',]$N)/100, format = "f", digits = 2),"%",")",sep = ""))

在R

编辑

1 个答案: