我有以下数据框:
df <- structure(list(x = c(0.389794300700167, -1.20807617542949, -0.363676017470862,
-1.62667268170309, -0.256478394123992, 1.10177950308713, 0.755781508027337,
-0.238233556018718, 0.98744470341339, 0.741390128383824), y = c(0.0893472664958216,
-0.954943856152377, -0.195150384667239, 0.92552126209408, 0.482978524836611,
-0.596310636720207, -2.18528683816953, -0.674865937875116, -2.11906119191017,
-1.2651980215309), fac = structure(c(2L, 1L, 2L, 3L, 1L, 1L,
1L, 1L, 2L, 2L), .Label = c("A", "B", "C"), class = "factor")), .Names = c("x",
"y", "fac"), row.names = c(NA, -10L), class = "data.frame")
df
#> x y fac manual_assignment
#> 1 0.3897943 0.08934727 B b.x
#> 2 -1.2080762 -0.95494386 A a.y
#> 3 -0.3636760 -0.19515038 B b.y
#> 4 -1.6266727 0.92552126 C c.y
#> 5 -0.2564784 0.48297852 A a.y
#> 6 1.1017795 -0.59631064 A a.x
#> 7 0.7557815 -2.18528684 A a.x
#> 8 -0.2382336 -0.67486594 A a.x
#> 9 0.9874447 -2.11906119 B b.x
#> 10 0.7413901 -1.26519802 B b.x
我想要做的是根据x
和y
中的值进行比较来计算行数。对于每一行,如果值x
大于y
,我们会将fac
成员的数量增加1.所以最终结果如下:
x.count y.count
A 3 2 (# a.y)
B 3 1
C 0 1
我怎样才能实现这一目标?可以使用dplyr吗?
答案 0 :(得分:2)
table
:
with(df, table(fac, ifelse(x > y, "x.count", "y.count")))
#fac x.count y.count
# A 3 2
# B 3 1
# C 0 1
使用dplyr/tidyr
,您需要更多代码行:
library(tidyverse)
df %>%
group_by(fac, measure = if_else(x > y, "x.count", "y.count")) %>%
tally() %>%
spread(measure, n, fill = 0)
#Source: local data frame [3 x 3]
#Groups: fac [3]
# fac x.count y.count
#* <fctr> <dbl> <dbl>
#1 A 3 2
#2 B 3 1
#3 C 0 1
答案 1 :(得分:1)
使用dplyr
,我们可以按fac
进行分组,并计算length
和x
值更大的y
个值。
library(dplyr)
df %>%
group_by(fac) %>%
summarise(x.count = length(which(x > y)),
y.count = length(which(x < y)))
# fac x.count y.count
# <fctr> <int> <int>
#1 A 3 2
#2 B 3 1
#3 C 0 1
答案 2 :(得分:1)
使用data.table
,我们可以使用
library(data.table)
dcast(setDT(df)[, .N, .(fac, measure = c('y.count', 'x.count')[(x > y) + 1])],
fac ~measure, fill = 0)
# fac x.count y.count
#1: A 3 2
#2: B 3 1
#3: C 0 1